rag-lite-ts 2.2.0 → 2.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. package/README.md +88 -5
  2. package/dist/cjs/cli/indexer.js +73 -15
  3. package/dist/cjs/cli/ui-server.d.ts +5 -0
  4. package/dist/cjs/cli/ui-server.js +152 -0
  5. package/dist/cjs/cli.js +25 -6
  6. package/dist/cjs/core/binary-index-format.js +6 -3
  7. package/dist/cjs/core/db.d.ts +56 -0
  8. package/dist/cjs/core/db.js +105 -0
  9. package/dist/cjs/core/ingestion.js +3 -0
  10. package/dist/cjs/core/knowledge-base-manager.d.ts +109 -0
  11. package/dist/cjs/core/knowledge-base-manager.js +256 -0
  12. package/dist/cjs/core/search-pipeline.js +1 -1
  13. package/dist/cjs/core/search.js +1 -1
  14. package/dist/cjs/core/vector-index-messages.d.ts +52 -0
  15. package/dist/cjs/core/vector-index-messages.js +5 -0
  16. package/dist/cjs/core/vector-index-worker.d.ts +6 -0
  17. package/dist/cjs/core/vector-index-worker.js +304 -0
  18. package/dist/cjs/core/vector-index.d.ts +45 -10
  19. package/dist/cjs/core/vector-index.js +229 -218
  20. package/dist/cjs/factories/ingestion-factory.js +3 -7
  21. package/dist/cjs/factories/search-factory.js +11 -0
  22. package/dist/cjs/index-manager.d.ts +23 -3
  23. package/dist/cjs/index-manager.js +84 -15
  24. package/dist/cjs/index.d.ts +2 -1
  25. package/dist/cjs/index.js +3 -1
  26. package/dist/esm/cli/indexer.js +73 -15
  27. package/dist/esm/cli/ui-server.d.ts +5 -0
  28. package/dist/esm/cli/ui-server.js +152 -0
  29. package/dist/esm/cli.js +25 -6
  30. package/dist/esm/core/binary-index-format.js +6 -3
  31. package/dist/esm/core/db.d.ts +56 -0
  32. package/dist/esm/core/db.js +105 -0
  33. package/dist/esm/core/ingestion.js +3 -0
  34. package/dist/esm/core/knowledge-base-manager.d.ts +109 -0
  35. package/dist/esm/core/knowledge-base-manager.js +256 -0
  36. package/dist/esm/core/search-pipeline.js +1 -1
  37. package/dist/esm/core/search.js +1 -1
  38. package/dist/esm/core/vector-index-messages.d.ts +52 -0
  39. package/dist/esm/core/vector-index-messages.js +5 -0
  40. package/dist/esm/core/vector-index-worker.d.ts +6 -0
  41. package/dist/esm/core/vector-index-worker.js +304 -0
  42. package/dist/esm/core/vector-index.d.ts +45 -10
  43. package/dist/esm/core/vector-index.js +229 -218
  44. package/dist/esm/factories/ingestion-factory.js +3 -7
  45. package/dist/esm/factories/search-factory.js +11 -0
  46. package/dist/esm/index-manager.d.ts +23 -3
  47. package/dist/esm/index-manager.js +84 -15
  48. package/dist/esm/index.d.ts +2 -1
  49. package/dist/esm/index.js +3 -1
  50. package/package.json +14 -7
@@ -1,39 +1,22 @@
1
1
  /**
2
2
  * CORE MODULE — Shared between text-only (rag-lite-ts) and future multimodal (rag-lite-mm)
3
3
  * Model-agnostic. No transformer or modality-specific logic.
4
+ *
5
+ * Worker-based implementation to prevent WebAssembly memory accumulation.
4
6
  */
7
+ import { Worker } from 'worker_threads';
5
8
  import { existsSync } from 'fs';
6
- import { JSDOM } from 'jsdom';
7
- import { ErrorCategory, ErrorSeverity, safeExecute } from './error-handler.js';
9
+ import { fileURLToPath } from 'url';
10
+ import { dirname, join } from 'path';
11
+ import { handleError, ErrorCategory, ErrorSeverity, createError } from './error-handler.js';
8
12
  import { createMissingFileError, createDimensionMismatchError } from './actionable-error-messages.js';
9
- import { BinaryIndexFormat } from './binary-index-format.js';
10
- // Set up browser-like environment for hnswlib-wasm
11
- if (typeof window === 'undefined') {
12
- const dom = new JSDOM('<!DOCTYPE html><html><body></body></html>', {
13
- url: 'http://localhost',
14
- pretendToBeVisual: true,
15
- resources: 'usable'
16
- });
17
- // Type assertion to avoid TypeScript issues with global polyfills
18
- global.window = dom.window;
19
- global.document = dom.window.document;
20
- global.XMLHttpRequest = dom.window.XMLHttpRequest;
21
- // Disable IndexedDB to prevent hnswlib-wasm from trying to use it
22
- global.indexedDB = undefined;
23
- // Override indexedDB on the window object to return undefined
24
- Object.defineProperty(dom.window, 'indexedDB', {
25
- value: undefined,
26
- writable: false,
27
- configurable: true
28
- });
29
- }
30
13
  export class VectorIndex {
31
- index = null;
32
- hnswlib = null;
14
+ worker = null;
33
15
  indexPath;
34
16
  options;
35
- currentSize = 0;
36
- vectorStorage = new Map(); // For persistence
17
+ messageQueue = new Map();
18
+ messageId = 0;
19
+ isInitialized = false;
37
20
  constructor(indexPath, options) {
38
21
  this.indexPath = indexPath;
39
22
  this.options = {
@@ -44,62 +27,138 @@ export class VectorIndex {
44
27
  };
45
28
  }
46
29
  /**
47
- * Initialize the HNSW index with cosine similarity using hnswlib-wasm
30
+ * Get the path to the worker script
31
+ * Always uses compiled .js files - workers cannot execute TypeScript directly
48
32
  */
49
- async initialize() {
50
- await safeExecute(async () => {
51
- // Load the hnswlib module
52
- if (!this.hnswlib) {
53
- // Temporarily suppress stderr output during hnswlib loading to avoid IndexedDB warnings
54
- const originalStderrWrite = process.stderr.write;
55
- const originalConsoleError = console.error;
56
- process.stderr.write = function (chunk, encoding, callback) {
57
- const message = chunk.toString();
58
- // Suppress specific IndexedDB/IDBFS related errors and WebAssembly errors
59
- if (message.includes('IDBFS') || message.includes('indexedDB not supported') ||
60
- message.includes('EmscriptenFileSystemManager') || message.includes('Aborted') ||
61
- message.includes('jsFS Error') || message.includes('syncing FS') ||
62
- message.includes('RuntimeError: unreachable') || message.includes('___trap') ||
63
- message.includes('abort') || message.includes('assert') ||
64
- message.includes('hnswlib-wasm/dist/hnswlib')) {
65
- if (callback)
66
- callback();
67
- return true;
68
- }
69
- return originalStderrWrite.call(this, chunk, encoding, callback);
70
- };
71
- console.error = (...args) => {
72
- const message = args.join(' ');
73
- if (message.includes('IDBFS') || message.includes('indexedDB not supported') ||
74
- message.includes('EmscriptenFileSystemManager') || message.includes('Aborted') ||
75
- message.includes('jsFS Error') || message.includes('syncing FS') ||
76
- message.includes('RuntimeError: unreachable') || message.includes('___trap') ||
77
- message.includes('abort') || message.includes('assert') ||
78
- message.includes('hnswlib-wasm/dist/hnswlib')) {
79
- return;
80
- }
81
- originalConsoleError.apply(console, args);
82
- };
83
- try {
84
- const hnswlibModule = await import('hnswlib-wasm/dist/hnswlib.js');
85
- const { loadHnswlib } = hnswlibModule;
86
- this.hnswlib = await loadHnswlib();
33
+ getWorkerPath() {
34
+ const currentFile = fileURLToPath(import.meta.url);
35
+ const currentDir = dirname(currentFile);
36
+ // Always prefer .js (compiled output)
37
+ const jsPath = join(currentDir, 'vector-index-worker.js');
38
+ // Check if .js exists in current directory (compiled)
39
+ if (existsSync(jsPath)) {
40
+ return jsPath;
41
+ }
42
+ // If running from src/ (development), try dist/ paths
43
+ if (currentDir.includes('src')) {
44
+ // Find project root (go up from src/core)
45
+ const projectRoot = currentDir.replace(/[\\/]src[\\/]core.*$/, '');
46
+ const distEsmPath = join(projectRoot, 'dist', 'esm', 'core', 'vector-index-worker.js');
47
+ const distCjsPath = join(projectRoot, 'dist', 'cjs', 'core', 'vector-index-worker.js');
48
+ if (existsSync(distEsmPath)) {
49
+ return distEsmPath;
50
+ }
51
+ if (existsSync(distCjsPath)) {
52
+ return distCjsPath;
53
+ }
54
+ }
55
+ // If running from node_modules (installed package), try dist paths
56
+ if (currentDir.includes('node_modules')) {
57
+ const packageRoot = currentDir.split('node_modules')[0];
58
+ const distEsmPath = join(packageRoot, 'node_modules', 'rag-lite-ts', 'dist', 'esm', 'core', 'vector-index-worker.js');
59
+ const distCjsPath = join(packageRoot, 'node_modules', 'rag-lite-ts', 'dist', 'cjs', 'core', 'vector-index-worker.js');
60
+ if (existsSync(distEsmPath)) {
61
+ return distEsmPath;
62
+ }
63
+ if (existsSync(distCjsPath)) {
64
+ return distCjsPath;
65
+ }
66
+ }
67
+ // Final fallback - will fail with clear error
68
+ throw new Error(`Worker file not found. Expected: ${jsPath}\n` +
69
+ 'Please run "npm run build" to compile the vector-index-worker.ts file.\n' +
70
+ `Current directory: ${currentDir}\n` +
71
+ `Checked paths: ${jsPath}, ${currentDir.includes('src') ? join(currentDir.replace(/[\\/]src[\\/]core.*$/, ''), 'dist', 'esm', 'core', 'vector-index-worker.js') : 'N/A'}`);
72
+ }
73
+ /**
74
+ * Ensure worker is created and ready
75
+ */
76
+ async ensureWorker() {
77
+ if (this.worker) {
78
+ return;
79
+ }
80
+ const workerPath = this.getWorkerPath();
81
+ this.worker = new Worker(workerPath);
82
+ // Set up message handler
83
+ this.worker.on('message', (response) => {
84
+ const handler = this.messageQueue.get(response.id);
85
+ if (handler) {
86
+ this.messageQueue.delete(response.id);
87
+ if (response.type === 'error') {
88
+ handler.reject(new Error(response.error || 'Unknown error'));
87
89
  }
88
- finally {
89
- // Restore original output streams
90
- process.stderr.write = originalStderrWrite;
91
- console.error = originalConsoleError;
90
+ else {
91
+ handler.resolve(response.payload);
92
92
  }
93
93
  }
94
- // Create new HNSW index (third parameter is autoSaveFilename, but we'll handle persistence manually)
95
- this.index = new this.hnswlib.HierarchicalNSW('cosine', this.options.dimensions, '');
96
- this.index.initIndex(this.options.maxElements, this.options.M || 16, this.options.efConstruction || 200, this.options.seed || 100);
97
- this.currentSize = 0;
98
- console.log(`Initialized HNSW index with ${this.options.dimensions} dimensions using hnswlib-wasm`);
99
- }, 'Vector Index Initialization', {
100
- category: ErrorCategory.INDEX,
101
- severity: ErrorSeverity.FATAL
102
94
  });
95
+ // Handle worker errors
96
+ this.worker.on('error', (error) => {
97
+ console.error('VectorIndex worker error:', error);
98
+ // Reject all pending requests
99
+ for (const [id, handler] of this.messageQueue.entries()) {
100
+ handler.reject(error);
101
+ }
102
+ this.messageQueue.clear();
103
+ });
104
+ // Handle worker exit
105
+ this.worker.on('exit', (code) => {
106
+ if (code !== 0) {
107
+ console.error(`VectorIndex worker exited with code ${code}`);
108
+ }
109
+ // Reject all pending requests
110
+ for (const [id, handler] of this.messageQueue.entries()) {
111
+ handler.reject(new Error(`Worker exited with code ${code}`));
112
+ }
113
+ this.messageQueue.clear();
114
+ this.worker = null;
115
+ this.isInitialized = false;
116
+ });
117
+ }
118
+ /**
119
+ * Send a message to the worker and wait for response
120
+ */
121
+ async sendMessage(type, payload) {
122
+ await this.ensureWorker();
123
+ return new Promise((resolve, reject) => {
124
+ const id = this.messageId++;
125
+ this.messageQueue.set(id, { resolve, reject });
126
+ const request = { id, type, payload };
127
+ this.worker.postMessage(request);
128
+ });
129
+ }
130
+ /**
131
+ * Convert Float32Array to ArrayBuffer for transfer
132
+ */
133
+ float32ArrayToBuffer(vector) {
134
+ const buffer = vector.buffer.slice(vector.byteOffset, vector.byteOffset + vector.byteLength);
135
+ // Ensure we return ArrayBuffer, not SharedArrayBuffer
136
+ return buffer instanceof ArrayBuffer ? buffer : new ArrayBuffer(0);
137
+ }
138
+ /**
139
+ * Initialize the HNSW index with cosine similarity using hnswlib-wasm
140
+ */
141
+ async initialize() {
142
+ try {
143
+ const payload = {
144
+ dimensions: this.options.dimensions,
145
+ maxElements: this.options.maxElements,
146
+ M: this.options.M,
147
+ efConstruction: this.options.efConstruction,
148
+ seed: this.options.seed,
149
+ indexPath: this.indexPath // Pass indexPath to worker for saveIndex operations
150
+ };
151
+ await this.sendMessage('init', payload);
152
+ this.isInitialized = true;
153
+ console.log(`Initialized HNSW index with ${this.options.dimensions} dimensions using hnswlib-wasm (worker)`);
154
+ }
155
+ catch (error) {
156
+ handleError(createError.index(`Failed to initialize vector index: ${error instanceof Error ? error.message : String(error)}`), 'Vector Index Initialization', {
157
+ category: ErrorCategory.INDEX,
158
+ severity: ErrorSeverity.FATAL
159
+ });
160
+ throw error;
161
+ }
103
162
  }
104
163
  /**
105
164
  * Load existing index from file using hnswlib-wasm
@@ -111,80 +170,12 @@ export class VectorIndex {
111
170
  });
112
171
  }
113
172
  try {
114
- // Load the hnswlib module
115
- if (!this.hnswlib) {
116
- // Temporarily suppress stderr output during hnswlib loading to avoid IndexedDB warnings
117
- const originalStderrWrite = process.stderr.write;
118
- const originalConsoleError = console.error;
119
- process.stderr.write = function (chunk, encoding, callback) {
120
- const message = chunk.toString();
121
- // Suppress specific IndexedDB/IDBFS related errors and WebAssembly errors
122
- if (message.includes('IDBFS') || message.includes('indexedDB not supported') ||
123
- message.includes('EmscriptenFileSystemManager') || message.includes('Aborted') ||
124
- message.includes('jsFS Error') || message.includes('syncing FS') ||
125
- message.includes('RuntimeError: unreachable') || message.includes('___trap') ||
126
- message.includes('abort') || message.includes('assert') ||
127
- message.includes('hnswlib-wasm/dist/hnswlib')) {
128
- if (callback)
129
- callback();
130
- return true;
131
- }
132
- return originalStderrWrite.call(this, chunk, encoding, callback);
133
- };
134
- console.error = (...args) => {
135
- const message = args.join(' ');
136
- if (message.includes('IDBFS') || message.includes('indexedDB not supported') ||
137
- message.includes('EmscriptenFileSystemManager') || message.includes('Aborted') ||
138
- message.includes('jsFS Error') || message.includes('syncing FS') ||
139
- message.includes('RuntimeError: unreachable') || message.includes('___trap') ||
140
- message.includes('abort') || message.includes('assert') ||
141
- message.includes('hnswlib-wasm/dist/hnswlib')) {
142
- return;
143
- }
144
- originalConsoleError.apply(console, args);
145
- };
146
- try {
147
- const hnswlibModule = await import('hnswlib-wasm/dist/hnswlib.js');
148
- const { loadHnswlib } = hnswlibModule;
149
- this.hnswlib = await loadHnswlib();
150
- }
151
- finally {
152
- // Restore original output streams
153
- process.stderr.write = originalStderrWrite;
154
- console.error = originalConsoleError;
155
- }
156
- }
157
- // Create new HNSW index (third parameter is autoSaveFilename, but we'll handle persistence manually)
158
- this.index = new this.hnswlib.HierarchicalNSW('cosine', this.options.dimensions, '');
159
- // Load from binary format
160
- const data = await BinaryIndexFormat.load(this.indexPath);
161
- // Validate dimensions
162
- if (data.dimensions !== this.options.dimensions) {
163
- console.log(`āš ļø Dimension mismatch detected:`);
164
- console.log(` Stored dimensions: ${data.dimensions}`);
165
- console.log(` Expected dimensions: ${this.options.dimensions}`);
166
- console.log(` Number of vectors: ${data.vectors.length}`);
167
- if (data.vectors.length > 0) {
168
- console.log(` Actual vector length: ${data.vectors[0].vector.length}`);
169
- }
170
- throw createDimensionMismatchError(this.options.dimensions, data.dimensions, 'vector index loading', { operationContext: 'VectorIndex.loadIndex' });
171
- }
172
- // Update options from stored data
173
- this.options.maxElements = data.maxElements;
174
- this.options.M = data.M;
175
- this.options.efConstruction = data.efConstruction;
176
- this.options.seed = data.seed;
177
- // Initialize HNSW index
178
- this.index.initIndex(this.options.maxElements, this.options.M, this.options.efConstruction, this.options.seed);
179
- // Clear and repopulate vector storage
180
- this.vectorStorage.clear();
181
- // Add all stored vectors to HNSW index
182
- for (const item of data.vectors) {
183
- this.index.addPoint(item.vector, item.id, false);
184
- this.vectorStorage.set(item.id, item.vector);
185
- }
186
- this.currentSize = data.currentSize;
187
- console.log(`āœ“ Loaded HNSW index with ${this.currentSize} vectors from ${this.indexPath}`);
173
+ const payload = {
174
+ indexPath: this.indexPath
175
+ };
176
+ const result = await this.sendMessage('loadIndex', payload);
177
+ this.isInitialized = true;
178
+ console.log(`āœ“ Loaded HNSW index with ${result.count} vectors from ${this.indexPath} (worker)`);
188
179
  }
189
180
  catch (error) {
190
181
  throw new Error(`Failed to load index from ${this.indexPath}: ${error}`);
@@ -194,26 +185,13 @@ export class VectorIndex {
194
185
  * Save index to binary format
195
186
  */
196
187
  async saveIndex() {
197
- if (!this.index) {
188
+ if (!this.isInitialized) {
198
189
  throw new Error('Index not initialized');
199
190
  }
200
191
  try {
201
- // Collect all vectors from storage
202
- const vectors = Array.from(this.vectorStorage.entries()).map(([id, vector]) => ({
203
- id,
204
- vector
205
- }));
206
- // Save to binary format
207
- await BinaryIndexFormat.save(this.indexPath, {
208
- dimensions: this.options.dimensions,
209
- maxElements: this.options.maxElements,
210
- M: this.options.M || 16,
211
- efConstruction: this.options.efConstruction || 200,
212
- seed: this.options.seed || 100,
213
- currentSize: this.currentSize,
214
- vectors
215
- });
216
- console.log(`āœ“ Saved HNSW index with ${this.currentSize} vectors to ${this.indexPath}`);
192
+ const result = await this.sendMessage('saveIndex');
193
+ const actualSize = result.count;
194
+ console.log(`āœ“ Saved HNSW index with ${actualSize} vectors (${(actualSize * this.options.dimensions * 4 / 1024).toFixed(2)} KB of vector data) to ${this.indexPath} (worker)`);
217
195
  }
218
196
  catch (error) {
219
197
  throw new Error(`Failed to save index to ${this.indexPath}: ${error}`);
@@ -221,84 +199,91 @@ export class VectorIndex {
221
199
  }
222
200
  /**
223
201
  * Add a single vector to the HNSW index
202
+ * Now async due to worker-based implementation
224
203
  */
225
- addVector(embeddingId, vector) {
226
- if (!this.index) {
204
+ async addVector(embeddingId, vector) {
205
+ if (!this.isInitialized) {
227
206
  throw new Error('Index not initialized');
228
207
  }
229
208
  if (vector.length !== this.options.dimensions) {
230
209
  throw createDimensionMismatchError(this.options.dimensions, vector.length, 'vector addition', { operationContext: 'VectorIndex.addVector' });
231
210
  }
232
- try {
233
- this.index.addPoint(vector, embeddingId, false);
234
- // Store vector for persistence
235
- this.vectorStorage.set(embeddingId, new Float32Array(vector));
236
- this.currentSize++;
237
- }
238
- catch (error) {
239
- throw new Error(`Failed to add vector ${embeddingId}: ${error}`);
240
- }
211
+ const payload = {
212
+ id: embeddingId,
213
+ vector: this.float32ArrayToBuffer(vector),
214
+ dimensions: vector.length
215
+ };
216
+ await this.sendMessage('addVector', payload);
241
217
  }
242
218
  /**
243
219
  * Add multiple vectors to the index in batch
220
+ * Now async due to worker-based implementation
244
221
  */
245
- addVectors(vectors) {
246
- for (const { id, vector } of vectors) {
247
- this.addVector(id, vector);
222
+ async addVectors(vectors) {
223
+ if (!this.isInitialized) {
224
+ throw new Error('Index not initialized');
248
225
  }
226
+ const payload = {
227
+ vectors: vectors.map(v => ({
228
+ id: v.id,
229
+ vector: this.float32ArrayToBuffer(v.vector),
230
+ dimensions: v.vector.length
231
+ }))
232
+ };
233
+ await this.sendMessage('addVectors', payload);
249
234
  }
250
235
  /**
251
236
  * Search for k nearest neighbors using hnswlib-wasm
237
+ * Now async due to worker-based implementation
252
238
  */
253
- search(queryVector, k = 5) {
254
- if (!this.index) {
239
+ async search(queryVector, k = 5) {
240
+ if (!this.isInitialized) {
255
241
  throw new Error('Index not initialized');
256
242
  }
257
243
  if (queryVector.length !== this.options.dimensions) {
258
244
  throw createDimensionMismatchError(this.options.dimensions, queryVector.length, 'vector search', { operationContext: 'VectorIndex.search' });
259
245
  }
260
- if (this.currentSize === 0) {
246
+ const payload = {
247
+ queryVector: this.float32ArrayToBuffer(queryVector),
248
+ dimensions: queryVector.length,
249
+ k
250
+ };
251
+ const result = await this.sendMessage('search', payload);
252
+ // Check if empty result
253
+ if (result.neighbors.length === 0 && result.distances.length === 0) {
261
254
  return { neighbors: [], distances: [] };
262
255
  }
263
- try {
264
- const result = this.index.searchKnn(queryVector, Math.min(k, this.currentSize), undefined);
265
- return {
266
- neighbors: result.neighbors,
267
- distances: result.distances
268
- };
269
- }
270
- catch (error) {
271
- throw new Error(`Search failed: ${error}`);
272
- }
256
+ return result;
273
257
  }
274
258
  /**
275
259
  * Get current number of vectors in the index
260
+ * Now async due to worker-based implementation
276
261
  */
277
- getCurrentCount() {
278
- return this.currentSize;
262
+ async getCurrentCount() {
263
+ if (!this.isInitialized) {
264
+ return 0;
265
+ }
266
+ const result = await this.sendMessage('getCurrentCount');
267
+ return result.count;
279
268
  }
280
269
  /**
281
270
  * Check if index exists on disk
282
271
  */
283
272
  indexExists() {
273
+ // This can be synchronous since it's just a file system check
284
274
  return existsSync(this.indexPath);
285
275
  }
286
276
  /**
287
277
  * Set search parameters for query time
278
+ * Now async due to worker-based implementation
288
279
  */
289
- setEf(ef) {
290
- if (!this.index) {
280
+ async setEf(ef) {
281
+ if (!this.isInitialized) {
291
282
  throw new Error('Index not initialized');
292
283
  }
284
+ const payload = { ef };
293
285
  try {
294
- // hnswlib-wasm might not have setEf method, check if it exists
295
- if (typeof this.index.setEfSearch === 'function') {
296
- this.index.setEfSearch(ef);
297
- console.log(`Set efSearch to ${ef}`);
298
- }
299
- else {
300
- console.log(`setEfSearch not available in hnswlib-wasm`);
301
- }
286
+ await this.sendMessage('setEf', payload);
302
287
  }
303
288
  catch (error) {
304
289
  console.log(`Failed to set ef: ${error}`);
@@ -306,22 +291,29 @@ export class VectorIndex {
306
291
  }
307
292
  /**
308
293
  * Resize index to accommodate more vectors
294
+ * Now async due to worker-based implementation
309
295
  */
310
- resizeIndex(newMaxElements) {
311
- if (!this.index) {
296
+ async resizeIndex(newMaxElements) {
297
+ if (!this.isInitialized) {
312
298
  throw new Error('Index not initialized');
313
299
  }
314
300
  if (newMaxElements <= this.options.maxElements) {
315
301
  throw new Error(`New max elements (${newMaxElements}) must be greater than current (${this.options.maxElements})`);
316
302
  }
317
- try {
318
- this.index.resizeIndex(newMaxElements);
319
- this.options.maxElements = newMaxElements;
320
- console.log(`Resized index to accommodate ${newMaxElements} vectors`);
321
- }
322
- catch (error) {
323
- throw new Error(`Failed to resize index: ${error}`);
324
- }
303
+ const payload = { newMaxElements };
304
+ await this.sendMessage('resizeIndex', payload);
305
+ this.options.maxElements = newMaxElements;
306
+ console.log(`Resized index to accommodate ${newMaxElements} vectors`);
307
+ }
308
+ /**
309
+ * Reset the vector index to an empty state.
310
+ * Clears all vectors from the HNSW graph and vectorStorage.
311
+ * The index parameters (dimensions, M, efConstruction) are preserved.
312
+ */
313
+ async reset() {
314
+ console.log('šŸ”„ VectorIndex: Resetting to empty state...');
315
+ await this.sendMessage('reset');
316
+ console.log('āœ“ VectorIndex reset: cleared all vectors');
325
317
  }
326
318
  /**
327
319
  * Get index options (for external access to configuration)
@@ -329,5 +321,24 @@ export class VectorIndex {
329
321
  getOptions() {
330
322
  return { ...this.options };
331
323
  }
324
+ /**
325
+ * Cleanup: terminate worker and free all WebAssembly memory
326
+ */
327
+ async cleanup() {
328
+ if (this.worker) {
329
+ try {
330
+ // Send cleanup message (worker will acknowledge)
331
+ await this.sendMessage('cleanup');
332
+ }
333
+ catch (error) {
334
+ // Ignore errors during cleanup
335
+ }
336
+ // Terminate worker - this frees ALL WebAssembly memory
337
+ await this.worker.terminate();
338
+ this.worker = null;
339
+ this.isInitialized = false;
340
+ this.messageQueue.clear();
341
+ }
342
+ }
332
343
  }
333
344
  //# sourceMappingURL=vector-index.js.map
@@ -296,8 +296,7 @@ export class IngestionFactory {
296
296
  // Preserve custom error messages for model mismatch and mode mismatch
297
297
  if (error instanceof Error && (error.message.includes('Model mismatch') ||
298
298
  error.message.includes('Mode mismatch') ||
299
- error.message.includes('--force-rebuild') ||
300
- error.message.includes('--rebuild-if-needed'))) {
299
+ error.message.includes('--force-rebuild'))) {
301
300
  throw error; // Re-throw custom validation errors as-is
302
301
  }
303
302
  throw createFactoryCreationError('IngestionFactory', error instanceof Error ? error.message : 'Unknown error', { operationContext: 'ingestion pipeline creation' });
@@ -366,13 +365,10 @@ export class IngestionFactory {
366
365
  `āŒ Model mismatch: Database is configured for '${existingSystemInfo.modelName}', but '${effectiveModel}' was requested.`,
367
366
  '',
368
367
  'šŸ› ļø How to fix this:',
369
- ' 1. Use --force-rebuild to change models:',
368
+ ' 1. Use --force-rebuild to rebuild from scratch:',
370
369
  ' raglite ingest <path> --model ' + effectiveModel + ' --force-rebuild',
371
370
  '',
372
- ' 2. Or use --rebuild-if-needed for automatic handling:',
373
- ' raglite ingest <path> --model ' + effectiveModel + ' --rebuild-if-needed',
374
- '',
375
- ' 3. Or continue using the existing model:',
371
+ ' 2. Or continue using the existing model:',
376
372
  ' raglite ingest <path> # Uses ' + existingSystemInfo.modelName,
377
373
  '',
378
374
  'šŸ” Model switching requires rebuilding the vector index because different models',
@@ -281,6 +281,17 @@ export class SearchFactory {
281
281
  enhancedMessage += '\n The index was created with a different model. Rebuild the index:';
282
282
  enhancedMessage += '\n raglite ingest <directory> --force-rebuild';
283
283
  }
284
+ else if (error.message.includes('Cannot enlarge memory') ||
285
+ error.message.includes('WebAssembly memory limit') ||
286
+ error.message.includes('memory limit exceeded')) {
287
+ enhancedMessage += '\n\nšŸ’” WebAssembly memory limit exceeded.';
288
+ enhancedMessage += '\n Your vector index is too large for the 2GB WebAssembly memory limit.';
289
+ enhancedMessage += '\n Solutions:';
290
+ enhancedMessage += '\n 1. Increase Node.js memory: node --max-old-space-size=4096 ...';
291
+ enhancedMessage += '\n 2. Split your data into smaller indexes';
292
+ enhancedMessage += '\n 3. Use a smaller embedding model (fewer dimensions)';
293
+ enhancedMessage += '\n 4. Rebuild the index with fewer vectors';
294
+ }
284
295
  return new Error(enhancedMessage);
285
296
  }
286
297
  return new Error(`SearchFactory.create failed: Unknown error`);
@@ -86,11 +86,12 @@ export declare class IndexManager {
86
86
  saveGroupedIndex(textEmbeddings: EmbeddingResult[], imageEmbeddings: EmbeddingResult[]): Promise<void>;
87
87
  /**
88
88
  * Search for similar vectors
89
+ * Now async due to worker-based VectorIndex implementation
89
90
  */
90
- search(queryVector: Float32Array, k?: number, contentType?: 'text' | 'image' | 'combined'): {
91
+ search(queryVector: Float32Array, k?: number, contentType?: 'text' | 'image' | 'combined'): Promise<{
91
92
  embeddingIds: string[];
92
93
  distances: number[];
93
- };
94
+ }>;
94
95
  /**
95
96
  * Get index statistics
96
97
  */
@@ -109,8 +110,27 @@ export declare class IndexManager {
109
110
  */
110
111
  private unhashEmbeddingId;
111
112
  /**
112
- * Close database connection
113
+ * Close database connection and cleanup vector index worker
113
114
  */
114
115
  close(): Promise<void>;
116
+ /**
117
+ * Reset the vector index by clearing all vectors while keeping the index structure.
118
+ * This is a safer alternative to file deletion that avoids file locking issues on Windows.
119
+ *
120
+ * The reset operation:
121
+ * 1. Clears in-memory HNSW index
122
+ * 2. Clears in-memory vector storage and ID mappings
123
+ * 3. Reinitializes an empty index with the same parameters
124
+ * 4. Saves the empty index to disk (overwrites existing file)
125
+ *
126
+ * @returns Promise that resolves when reset is complete
127
+ */
128
+ reset(): Promise<void>;
129
+ /**
130
+ * Check if the index has any vectors
131
+ * @returns true if the index contains vectors, false if empty
132
+ * Now async due to worker-based VectorIndex implementation
133
+ */
134
+ hasVectors(): Promise<boolean>;
115
135
  }
116
136
  //# sourceMappingURL=index-manager.d.ts.map