rag-lite-ts 2.0.0 → 2.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -433,7 +433,6 @@ Now Claude can search your docs directly! Works with any MCP-compatible AI tool.
433
433
  - **Content management** - Deduplication, cleanup
434
434
  - **Model compatibility** - Auto-detection, rebuilds
435
435
  - **Error recovery** - Clear messages, helpful hints
436
- - **Battle-tested** - Used in real applications
437
436
 
438
437
  </td>
439
438
  </tr>
@@ -15,8 +15,8 @@ export const DEFAULT_BATCH_CONFIG = {
15
15
  textBatchSize: 16,
16
16
  imageBatchSize: 4, // Smaller for memory-intensive image processing
17
17
  maxConcurrentBatches: 2,
18
- // Memory management (256MB threshold)
19
- memoryThresholdMB: 256,
18
+ // Memory management (512MB threshold for multimodal processing)
19
+ memoryThresholdMB: 512,
20
20
  enableMemoryMonitoring: true,
21
21
  enableGarbageCollection: true,
22
22
  // Progress reporting every 5 batches
@@ -402,13 +402,8 @@ export class BatchProcessingOptimizer {
402
402
  */
403
403
  async preloadImageProcessingModels() {
404
404
  try {
405
- if (!this.resourcePool.has('imageToText')) {
406
- console.log('Preloading image-to-text processor...');
407
- const processor = await LazyMultimodalLoader.loadImageToTextProcessor();
408
- this.resourcePool.set('imageToText', processor);
409
- // Register with resource manager
410
- this.resourceManager.registerImageProcessor(processor, 'image-to-text');
411
- }
405
+ // Note: Image-to-text processor is loaded on-demand by file-processor.ts
406
+ // to avoid conflicts with different pipeline configurations
412
407
  if (!this.resourcePool.has('metadataExtractor')) {
413
408
  console.log('Preloading image metadata extractor...');
414
409
  const extractor = await LazyMultimodalLoader.loadImageMetadataExtractor();
@@ -519,7 +514,7 @@ export function createImageBatchProcessor() {
519
514
  return new BatchProcessingOptimizer({
520
515
  imageBatchSize: 2, // Very small batches for memory efficiency
521
516
  textBatchSize: 8,
522
- memoryThresholdMB: 128, // Lower threshold for images
517
+ memoryThresholdMB: 512, // Higher threshold for memory-intensive image processing
523
518
  enableMemoryMonitoring: true,
524
519
  enableGarbageCollection: true,
525
520
  enableParallelProcessing: false, // Sequential for better memory control
@@ -534,7 +529,7 @@ export function createTextBatchProcessor() {
534
529
  textBatchSize: 32, // Larger batches for text
535
530
  imageBatchSize: 4,
536
531
  enableParallelProcessing: true, // Parallel processing for text
537
- memoryThresholdMB: 512, // Higher threshold for text
532
+ memoryThresholdMB: 256, // Lower threshold sufficient for text processing
538
533
  progressReportInterval: 10
539
534
  });
540
535
  }
@@ -0,0 +1,52 @@
1
+ /**
2
+ * Binary Index Format Module
3
+ *
4
+ * Provides efficient binary serialization for HNSW vector indices.
5
+ *
6
+ * Format Specification:
7
+ * - Header: 24 bytes (6 × uint32)
8
+ * - Vectors: N × (4 + D × 4) bytes
9
+ * - Little-endian encoding for cross-platform compatibility
10
+ * - 4-byte alignment for Float32Array zero-copy views
11
+ *
12
+ * Performance:
13
+ * - 3.66x smaller than JSON format
14
+ * - 3.5x faster loading
15
+ * - Zero-copy Float32Array views
16
+ */
17
+ export interface BinaryIndexData {
18
+ dimensions: number;
19
+ maxElements: number;
20
+ M: number;
21
+ efConstruction: number;
22
+ seed: number;
23
+ currentSize: number;
24
+ vectors: Array<{
25
+ id: number;
26
+ vector: Float32Array;
27
+ }>;
28
+ }
29
+ export declare class BinaryIndexFormat {
30
+ /**
31
+ * Save index data to binary format
32
+ *
33
+ * File structure:
34
+ * - Header (24 bytes): dimensions, maxElements, M, efConstruction, seed, currentSize
35
+ * - Vectors: For each vector: id (4 bytes) + vector data (dimensions × 4 bytes)
36
+ *
37
+ * @param indexPath Path to save the binary index file
38
+ * @param data Index data to serialize
39
+ */
40
+ static save(indexPath: string, data: BinaryIndexData): Promise<void>;
41
+ /**
42
+ * Load index data from binary format
43
+ *
44
+ * Uses zero-copy Float32Array views for efficient loading.
45
+ * Copies the views to ensure data persistence after buffer lifecycle.
46
+ *
47
+ * @param indexPath Path to the binary index file
48
+ * @returns Deserialized index data
49
+ */
50
+ static load(indexPath: string): Promise<BinaryIndexData>;
51
+ }
52
+ //# sourceMappingURL=binary-index-format.d.ts.map
@@ -0,0 +1,122 @@
1
+ /**
2
+ * Binary Index Format Module
3
+ *
4
+ * Provides efficient binary serialization for HNSW vector indices.
5
+ *
6
+ * Format Specification:
7
+ * - Header: 24 bytes (6 × uint32)
8
+ * - Vectors: N × (4 + D × 4) bytes
9
+ * - Little-endian encoding for cross-platform compatibility
10
+ * - 4-byte alignment for Float32Array zero-copy views
11
+ *
12
+ * Performance:
13
+ * - 3.66x smaller than JSON format
14
+ * - 3.5x faster loading
15
+ * - Zero-copy Float32Array views
16
+ */
17
+ import { readFileSync, writeFileSync } from 'fs';
18
+ export class BinaryIndexFormat {
19
+ /**
20
+ * Save index data to binary format
21
+ *
22
+ * File structure:
23
+ * - Header (24 bytes): dimensions, maxElements, M, efConstruction, seed, currentSize
24
+ * - Vectors: For each vector: id (4 bytes) + vector data (dimensions × 4 bytes)
25
+ *
26
+ * @param indexPath Path to save the binary index file
27
+ * @param data Index data to serialize
28
+ */
29
+ static async save(indexPath, data) {
30
+ // Calculate total size
31
+ const headerSize = 24; // 6 uint32 fields
32
+ const vectorSize = 4 + (data.dimensions * 4); // id + vector
33
+ const totalSize = headerSize + (data.currentSize * vectorSize);
34
+ const buffer = new ArrayBuffer(totalSize);
35
+ const view = new DataView(buffer);
36
+ let offset = 0;
37
+ // Write header (24 bytes, all little-endian)
38
+ view.setUint32(offset, data.dimensions, true);
39
+ offset += 4;
40
+ view.setUint32(offset, data.maxElements, true);
41
+ offset += 4;
42
+ view.setUint32(offset, data.M, true);
43
+ offset += 4;
44
+ view.setUint32(offset, data.efConstruction, true);
45
+ offset += 4;
46
+ view.setUint32(offset, data.seed, true);
47
+ offset += 4;
48
+ view.setUint32(offset, data.currentSize, true);
49
+ offset += 4;
50
+ // Write vectors
51
+ for (const item of data.vectors) {
52
+ // Ensure 4-byte alignment (should always be true with our format)
53
+ if (offset % 4 !== 0) {
54
+ throw new Error(`Offset ${offset} is not 4-byte aligned`);
55
+ }
56
+ // Write vector ID
57
+ view.setUint32(offset, item.id, true);
58
+ offset += 4;
59
+ // Write vector data
60
+ for (let i = 0; i < item.vector.length; i++) {
61
+ view.setFloat32(offset, item.vector[i], true);
62
+ offset += 4;
63
+ }
64
+ }
65
+ // Write to file
66
+ writeFileSync(indexPath, Buffer.from(buffer));
67
+ }
68
+ /**
69
+ * Load index data from binary format
70
+ *
71
+ * Uses zero-copy Float32Array views for efficient loading.
72
+ * Copies the views to ensure data persistence after buffer lifecycle.
73
+ *
74
+ * @param indexPath Path to the binary index file
75
+ * @returns Deserialized index data
76
+ */
77
+ static async load(indexPath) {
78
+ const buffer = readFileSync(indexPath);
79
+ const view = new DataView(buffer.buffer, buffer.byteOffset, buffer.byteLength);
80
+ let offset = 0;
81
+ // Read header (24 bytes, all little-endian)
82
+ const dimensions = view.getUint32(offset, true);
83
+ offset += 4;
84
+ const maxElements = view.getUint32(offset, true);
85
+ offset += 4;
86
+ const M = view.getUint32(offset, true);
87
+ offset += 4;
88
+ const efConstruction = view.getUint32(offset, true);
89
+ offset += 4;
90
+ const seed = view.getUint32(offset, true);
91
+ offset += 4;
92
+ const currentSize = view.getUint32(offset, true);
93
+ offset += 4;
94
+ // Read vectors
95
+ const vectors = [];
96
+ for (let i = 0; i < currentSize; i++) {
97
+ // Ensure 4-byte alignment (should always be true with our format)
98
+ if (offset % 4 !== 0) {
99
+ throw new Error(`Offset ${offset} is not 4-byte aligned`);
100
+ }
101
+ // Read vector ID
102
+ const id = view.getUint32(offset, true);
103
+ offset += 4;
104
+ // Zero-copy Float32Array view (fast!)
105
+ const vectorView = new Float32Array(buffer.buffer, buffer.byteOffset + offset, dimensions);
106
+ // Copy to avoid buffer lifecycle issues
107
+ const vector = new Float32Array(vectorView);
108
+ offset += dimensions * 4;
109
+ vectors.push({ id, vector });
110
+ }
111
+ return {
112
+ dimensions,
113
+ maxElements,
114
+ M,
115
+ efConstruction,
116
+ seed,
117
+ currentSize,
118
+ vectors
119
+ };
120
+ }
121
+ }
122
+ //# sourceMappingURL=binary-index-format.js.map
@@ -290,7 +290,7 @@ export class IngestionPipeline {
290
290
  chunkSize: config.chunk_size,
291
291
  chunkOverlap: config.chunk_overlap
292
292
  };
293
- const chunkingResult = await this.chunkDocumentsWithContentTypes(fileResult.documents, effectiveChunkConfig);
293
+ const chunkingResult = await this.chunkDocumentsWithContentTypes(fileResult.documents, effectiveChunkConfig, options.mode);
294
294
  if (chunkingResult.totalChunks === 0) {
295
295
  console.log('No chunks created from documents');
296
296
  return {
@@ -364,7 +364,7 @@ export class IngestionPipeline {
364
364
  * Chunk all documents and organize results with content-type awareness
365
365
  * Enhanced to handle different content types appropriately
366
366
  */
367
- async chunkDocumentsWithContentTypes(documents, chunkConfig) {
367
+ async chunkDocumentsWithContentTypes(documents, chunkConfig, mode) {
368
368
  const documentChunks = [];
369
369
  const allChunks = [];
370
370
  let totalChunks = 0;
@@ -384,8 +384,18 @@ export class IngestionPipeline {
384
384
  metadata: document.metadata
385
385
  }];
386
386
  }
387
+ else if (mode === 'multimodal') {
388
+ // In multimodal mode, don't chunk text - CLIP handles truncation at 77 tokens
389
+ // Chunking doesn't make sense because CLIP can't handle long text anyway
390
+ chunks = [{
391
+ text: document.content,
392
+ chunkIndex: 0,
393
+ contentType: 'text',
394
+ metadata: document.metadata
395
+ }];
396
+ }
387
397
  else {
388
- // For text documents, use normal chunking
398
+ // For text mode, use normal chunking
389
399
  const textChunks = await chunkDocument(document, chunkConfig);
390
400
  chunks = textChunks.map(chunk => ({
391
401
  ...chunk,
@@ -69,7 +69,7 @@ export const SUPPORTED_MODELS = {
69
69
  supportsMetadata: true,
70
70
  supportsMultimodal: true, // True cross-modal search capabilities
71
71
  maxBatchSize: 8,
72
- maxTextLength: 77, // CLIP's text sequence length limit
72
+ maxTextLength: 77, // CLIP's token limit (tokenizer handles truncation)
73
73
  supportedImageFormats: ['jpg', 'jpeg', 'png', 'webp', 'gif']
74
74
  },
75
75
  requirements: {
@@ -92,7 +92,7 @@ export const SUPPORTED_MODELS = {
92
92
  supportsMetadata: true,
93
93
  supportsMultimodal: true, // True cross-modal search capabilities
94
94
  maxBatchSize: 4,
95
- maxTextLength: 77, // CLIP's text sequence length limit
95
+ maxTextLength: 77, // CLIP's token limit (tokenizer handles truncation)
96
96
  supportedImageFormats: ['jpg', 'jpeg', 'png', 'webp', 'gif']
97
97
  },
98
98
  requirements: {
@@ -194,9 +194,9 @@ export class ModelRegistry {
194
194
  suggestions.push('Use smaller batch sizes for optimal performance');
195
195
  }
196
196
  // Text length limitations
197
- if (modelInfo.capabilities.maxTextLength && modelInfo.capabilities.maxTextLength < 512) {
197
+ if (modelInfo.capabilities.maxTextLength && modelInfo.capabilities.maxTextLength < 256) {
198
198
  warnings.push(`Model has limited text length: ${modelInfo.capabilities.maxTextLength} characters`);
199
- suggestions.push('Consider chunking long texts before processing');
199
+ suggestions.push('Long texts will be truncated by the tokenizer');
200
200
  }
201
201
  // Image format support
202
202
  if (modelInfo.capabilities.supportsImages && modelInfo.capabilities.supportedImageFormats) {
@@ -97,20 +97,10 @@ export declare class TextDerivedRerankingStrategy implements RerankingStrategy {
97
97
  readonly supportedContentTypes: string[];
98
98
  isEnabled: boolean;
99
99
  private crossEncoderReranker;
100
- private imageToTextModel;
101
- private imageToTextModelName;
102
- private initialized;
103
100
  constructor(imageToTextModelName?: string, crossEncoderModelName?: string);
104
- /**
105
- * Initialize the image-to-text model if not already done
106
- */
107
- private ensureInitialized;
108
- /**
109
- * Ensure DOM polyfills are set up for transformers.js
110
- */
111
- private ensurePolyfills;
112
101
  /**
113
102
  * Generate text description for an image
103
+ * Uses the shared image-to-text functionality from file-processor
114
104
  */
115
105
  private generateImageDescription;
116
106
  /**
@@ -128,11 +118,6 @@ export declare class TextDerivedRerankingStrategy implements RerankingStrategy {
128
118
  description: string;
129
119
  requiredModels: string[];
130
120
  configOptions: {
131
- imageToTextModel: {
132
- type: string;
133
- description: string;
134
- default: string;
135
- };
136
121
  crossEncoderModel: {
137
122
  type: string;
138
123
  description: string;
@@ -174,69 +174,22 @@ export class TextDerivedRerankingStrategy {
174
174
  supportedContentTypes = ['text', 'image'];
175
175
  isEnabled = true;
176
176
  crossEncoderReranker;
177
- imageToTextModel = null;
178
- imageToTextModelName = 'Xenova/vit-gpt2-image-captioning';
179
- initialized = false;
180
177
  constructor(imageToTextModelName, crossEncoderModelName) {
181
- if (imageToTextModelName) {
182
- this.imageToTextModelName = imageToTextModelName;
183
- }
178
+ // Note: imageToTextModelName parameter is kept for backward compatibility
179
+ // but is no longer used since we delegate to file-processor's implementation
184
180
  // Create the underlying cross-encoder strategy
185
181
  this.crossEncoderReranker = new CrossEncoderRerankingStrategy(crossEncoderModelName);
186
182
  }
187
- /**
188
- * Initialize the image-to-text model if not already done
189
- */
190
- async ensureInitialized() {
191
- if (!this.initialized) {
192
- try {
193
- console.log(`Loading image-to-text model: ${this.imageToTextModelName}`);
194
- // Set up polyfills for transformers.js
195
- this.ensurePolyfills();
196
- const { pipeline } = await import('@huggingface/transformers');
197
- this.imageToTextModel = await pipeline('image-to-text', this.imageToTextModelName);
198
- this.initialized = true;
199
- console.log(`Image-to-text model loaded successfully: ${this.imageToTextModelName}`);
200
- }
201
- catch (error) {
202
- console.warn(`Image-to-text model initialization failed: ${error instanceof Error ? error.message : 'Unknown error'}`);
203
- this.isEnabled = false;
204
- }
205
- }
206
- }
207
- /**
208
- * Ensure DOM polyfills are set up for transformers.js
209
- */
210
- ensurePolyfills() {
211
- if (typeof window === 'undefined' && typeof globalThis !== 'undefined') {
212
- if (typeof globalThis.self === 'undefined') {
213
- globalThis.self = globalThis;
214
- }
215
- if (typeof global.self === 'undefined') {
216
- global.self = global;
217
- }
218
- }
219
- }
220
183
  /**
221
184
  * Generate text description for an image
185
+ * Uses the shared image-to-text functionality from file-processor
222
186
  */
223
187
  async generateImageDescription(imagePath) {
224
- await this.ensureInitialized();
225
- if (!this.imageToTextModel) {
226
- throw new Error('Image-to-text model not loaded');
227
- }
228
188
  try {
229
- const result = await this.imageToTextModel(imagePath);
230
- // Handle different response formats from the pipeline
231
- if (Array.isArray(result) && result.length > 0) {
232
- return result[0].generated_text || result[0].text || String(result[0]);
233
- }
234
- else if (result && typeof result === 'object') {
235
- return result.generated_text || result.text || String(result);
236
- }
237
- else {
238
- return String(result);
239
- }
189
+ // Use the file-processor's image description function which has proven to work reliably
190
+ const { generateImageDescriptionForFile } = await import('../file-processor.js');
191
+ const result = await generateImageDescriptionForFile(imagePath);
192
+ return result.description;
240
193
  }
241
194
  catch (error) {
242
195
  console.warn(`Failed to generate description for image ${imagePath}: ${error instanceof Error ? error.message : 'Unknown error'}`);
@@ -249,22 +202,11 @@ export class TextDerivedRerankingStrategy {
249
202
  * Rerank search results using text-derived approach
250
203
  */
251
204
  rerank = async (query, results, contentType) => {
252
- // If strategy is disabled, return results unchanged
253
- if (!this.isEnabled) {
254
- return results;
255
- }
256
205
  // Validate content type
257
206
  if (contentType && !this.supportedContentTypes.includes(contentType)) {
258
207
  throw new Error(`Text-derived strategy does not support content type '${contentType}'. ` +
259
208
  `Supported types: ${this.supportedContentTypes.join(', ')}`);
260
209
  }
261
- // Ensure models are initialized
262
- await this.ensureInitialized();
263
- // If initialization failed, return results unchanged
264
- if (!this.isEnabled) {
265
- console.warn('Text-derived reranker not enabled, returning results unchanged');
266
- return results;
267
- }
268
210
  try {
269
211
  // Step 1: Convert images to text descriptions
270
212
  const processedResults = await Promise.all(results.map(async (result) => {
@@ -314,12 +256,8 @@ export class TextDerivedRerankingStrategy {
314
256
  * Configure the reranking strategy
315
257
  */
316
258
  configure(config) {
317
- if (config.imageToTextModel && typeof config.imageToTextModel === 'string') {
318
- this.imageToTextModelName = config.imageToTextModel;
319
- // Reset initialization to use new model
320
- this.initialized = false;
321
- this.imageToTextModel = null;
322
- }
259
+ // Note: imageToTextModel configuration is no longer used
260
+ // since we delegate to file-processor's implementation
323
261
  if (config.crossEncoderModel && typeof config.crossEncoderModel === 'string') {
324
262
  this.crossEncoderReranker.configure({ modelName: config.crossEncoderModel });
325
263
  }
@@ -334,15 +272,10 @@ export class TextDerivedRerankingStrategy {
334
272
  return {
335
273
  description: 'Text-derived reranking that converts images to text descriptions then applies cross-encoder reranking',
336
274
  requiredModels: [
337
- 'Xenova/vit-gpt2-image-captioning', // Image-to-text model
275
+ 'Xenova/vit-gpt2-image-captioning', // Image-to-text model (via file-processor)
338
276
  'Xenova/ms-marco-MiniLM-L-6-v2' // Cross-encoder model
339
277
  ],
340
278
  configOptions: {
341
- imageToTextModel: {
342
- type: 'string',
343
- description: 'Image-to-text model name for generating descriptions',
344
- default: 'Xenova/vit-gpt2-image-captioning'
345
- },
346
279
  crossEncoderModel: {
347
280
  type: 'string',
348
281
  description: 'Cross-encoder model name for text reranking',
@@ -360,16 +293,15 @@ export class TextDerivedRerankingStrategy {
360
293
  * Check if the strategy is ready to use
361
294
  */
362
295
  async isReady() {
363
- await this.ensureInitialized();
364
296
  const crossEncoderReady = await this.crossEncoderReranker.isReady();
365
- return this.isEnabled && this.imageToTextModel !== null && crossEncoderReady;
297
+ return this.isEnabled && crossEncoderReady;
366
298
  }
367
299
  /**
368
300
  * Get the current model names being used
369
301
  */
370
302
  getModelNames() {
371
303
  return {
372
- imageToText: this.imageToTextModelName,
304
+ imageToText: 'Xenova/vit-gpt2-image-captioning', // Fixed model via file-processor
373
305
  crossEncoder: this.crossEncoderReranker.getModelName()
374
306
  };
375
307
  }
@@ -377,8 +309,6 @@ export class TextDerivedRerankingStrategy {
377
309
  * Clean up resources
378
310
  */
379
311
  async cleanup() {
380
- this.initialized = false;
381
- this.imageToTextModel = null;
382
312
  await this.crossEncoderReranker.cleanup();
383
313
  }
384
314
  }
@@ -30,7 +30,7 @@ export declare class VectorIndex {
30
30
  */
31
31
  loadIndex(): Promise<void>;
32
32
  /**
33
- * Save index to file using JSON format (since IDBFS doesn't work in Node.js)
33
+ * Save index to binary format
34
34
  */
35
35
  saveIndex(): Promise<void>;
36
36
  /**
@@ -2,10 +2,11 @@
2
2
  * CORE MODULE — Shared between text-only (rag-lite-ts) and future multimodal (rag-lite-mm)
3
3
  * Model-agnostic. No transformer or modality-specific logic.
4
4
  */
5
- import { readFileSync, writeFileSync, existsSync } from 'fs';
5
+ import { existsSync } from 'fs';
6
6
  import { JSDOM } from 'jsdom';
7
7
  import { ErrorCategory, ErrorSeverity, safeExecute } from './error-handler.js';
8
8
  import { createMissingFileError, createDimensionMismatchError } from './actionable-error-messages.js';
9
+ import { BinaryIndexFormat } from './binary-index-format.js';
9
10
  // Set up browser-like environment for hnswlib-wasm
10
11
  if (typeof window === 'undefined') {
11
12
  const dom = new JSDOM('<!DOCTYPE html><html><body></body></html>', {
@@ -153,66 +154,64 @@ export class VectorIndex {
153
154
  }
154
155
  // Create new HNSW index (third parameter is autoSaveFilename, but we'll handle persistence manually)
155
156
  this.index = new this.hnswlib.HierarchicalNSW('cosine', this.options.dimensions, '');
156
- // Load from JSON format since IDBFS doesn't work in Node.js
157
- const data = readFileSync(this.indexPath, 'utf-8');
158
- const stored = JSON.parse(data);
159
- // Check dimension compatibility and log details
160
- if (stored.dimensions && stored.dimensions !== this.options.dimensions) {
157
+ // Load from binary format
158
+ const data = await BinaryIndexFormat.load(this.indexPath);
159
+ // Validate dimensions
160
+ if (data.dimensions !== this.options.dimensions) {
161
161
  console.log(`⚠️ Dimension mismatch detected:`);
162
- console.log(` Stored dimensions: ${stored.dimensions}`);
162
+ console.log(` Stored dimensions: ${data.dimensions}`);
163
163
  console.log(` Expected dimensions: ${this.options.dimensions}`);
164
- console.log(` Number of vectors: ${stored.vectors?.length || 0}`);
165
- if (stored.vectors && stored.vectors.length > 0) {
166
- console.log(` Actual vector length: ${stored.vectors[0].vector.length}`);
164
+ console.log(` Number of vectors: ${data.vectors.length}`);
165
+ if (data.vectors.length > 0) {
166
+ console.log(` Actual vector length: ${data.vectors[0].vector.length}`);
167
167
  }
168
- throw createDimensionMismatchError(this.options.dimensions, stored.dimensions, 'vector index loading', { operationContext: 'VectorIndex.loadIndex' });
168
+ throw createDimensionMismatchError(this.options.dimensions, data.dimensions, 'vector index loading', { operationContext: 'VectorIndex.loadIndex' });
169
169
  }
170
170
  // Update options from stored data
171
- this.options.maxElements = stored.maxElements || this.options.maxElements;
172
- this.options.M = stored.M || this.options.M;
173
- this.options.efConstruction = stored.efConstruction || this.options.efConstruction;
174
- this.options.seed = stored.seed || this.options.seed;
175
- // Recreate the index from stored data
176
- this.index.initIndex(this.options.maxElements, this.options.M || 16, this.options.efConstruction || 200, this.options.seed || 100);
171
+ this.options.maxElements = data.maxElements;
172
+ this.options.M = data.M;
173
+ this.options.efConstruction = data.efConstruction;
174
+ this.options.seed = data.seed;
175
+ // Initialize HNSW index
176
+ this.index.initIndex(this.options.maxElements, this.options.M, this.options.efConstruction, this.options.seed);
177
177
  // Clear and repopulate vector storage
178
178
  this.vectorStorage.clear();
179
- // Add all stored vectors back
180
- for (const item of stored.vectors || []) {
181
- const vector = new Float32Array(item.vector);
182
- this.index.addPoint(vector, item.id, false);
183
- this.vectorStorage.set(item.id, vector);
179
+ // Add all stored vectors to HNSW index
180
+ for (const item of data.vectors) {
181
+ this.index.addPoint(item.vector, item.id, false);
182
+ this.vectorStorage.set(item.id, item.vector);
184
183
  }
185
- this.currentSize = stored.vectors?.length || 0;
186
- console.log(`Loaded HNSW index with ${this.currentSize} vectors from ${this.indexPath}`);
184
+ this.currentSize = data.currentSize;
185
+ console.log(`✓ Loaded HNSW index with ${this.currentSize} vectors from ${this.indexPath}`);
187
186
  }
188
187
  catch (error) {
189
188
  throw new Error(`Failed to load index from ${this.indexPath}: ${error}`);
190
189
  }
191
190
  }
192
191
  /**
193
- * Save index to file using JSON format (since IDBFS doesn't work in Node.js)
192
+ * Save index to binary format
194
193
  */
195
194
  async saveIndex() {
196
195
  if (!this.index) {
197
196
  throw new Error('Index not initialized');
198
197
  }
199
198
  try {
200
- // Convert stored vectors to serializable format
199
+ // Collect all vectors from storage
201
200
  const vectors = Array.from(this.vectorStorage.entries()).map(([id, vector]) => ({
202
201
  id,
203
- vector: Array.from(vector)
202
+ vector
204
203
  }));
205
- const stored = {
204
+ // Save to binary format
205
+ await BinaryIndexFormat.save(this.indexPath, {
206
206
  dimensions: this.options.dimensions,
207
207
  maxElements: this.options.maxElements,
208
208
  M: this.options.M || 16,
209
209
  efConstruction: this.options.efConstruction || 200,
210
210
  seed: this.options.seed || 100,
211
211
  currentSize: this.currentSize,
212
- vectors: vectors
213
- };
214
- writeFileSync(this.indexPath, JSON.stringify(stored, null, 2));
215
- console.log(`Saved HNSW index with ${this.currentSize} vectors to ${this.indexPath}`);
212
+ vectors
213
+ });
214
+ console.log(`✓ Saved HNSW index with ${this.currentSize} vectors to ${this.indexPath}`);
216
215
  }
217
216
  catch (error) {
218
217
  throw new Error(`Failed to save index to ${this.indexPath}: ${error}`);
@@ -30,11 +30,8 @@ if (typeof window === 'undefined') {
30
30
  if (typeof globalThis.navigator === 'undefined') {
31
31
  globalThis.navigator = dom.window.navigator;
32
32
  }
33
- // Polyfill createImageBitmap if needed (for image processing)
34
- if (typeof globalThis.createImageBitmap === 'undefined') {
35
- globalThis.createImageBitmap = dom.window.createImageBitmap || (() => {
36
- throw new Error('createImageBitmap not available in Node.js environment');
37
- });
38
- }
33
+ // Note: Do NOT polyfill createImageBitmap with a fake implementation
34
+ // RawImage.fromURL() will handle image loading correctly without it
35
+ // Setting a fake createImageBitmap that throws errors breaks image loading
39
36
  }
40
37
  //# sourceMappingURL=dom-polyfills.js.map
@@ -36,6 +36,8 @@
36
36
  * ```
37
37
  */
38
38
  export { TextSearchFactory, TextIngestionFactory, TextRAGFactory, TextFactoryHelpers } from './text-factory.js';
39
+ export { PolymorphicSearchFactory } from './polymorphic-factory.js';
40
+ export type { PolymorphicSearchOptions } from './polymorphic-factory.js';
39
41
  export type { TextSearchOptions, TextIngestionOptions, ContentSystemConfig } from './text-factory.js';
40
42
  export { TextSearchFactory as SearchFactory } from './text-factory.js';
41
43
  export { TextIngestionFactory as IngestionFactory } from './text-factory.js';
@@ -37,6 +37,8 @@
37
37
  */
38
38
  // Main factory classes
39
39
  export { TextSearchFactory, TextIngestionFactory, TextRAGFactory, TextFactoryHelpers } from './text-factory.js';
40
+ // Polymorphic factory for mode-aware search
41
+ export { PolymorphicSearchFactory } from './polymorphic-factory.js';
40
42
  // Convenience re-exports for common patterns
41
43
  export { TextSearchFactory as SearchFactory } from './text-factory.js';
42
44
  export { TextIngestionFactory as IngestionFactory } from './text-factory.js';