npm - rag-lite-ts - Versions diffs - 2.0.0 → 2.0.2 - Mend

rag-lite-ts 2.0.0 → 2.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (24) hide show

package/README.md +0 -1
package/dist/core/batch-processing-optimizer.js +6 -11
package/dist/core/binary-index-format.d.ts +52 -0
package/dist/core/binary-index-format.js +122 -0
package/dist/core/ingestion.js +13 -3
package/dist/core/model-registry.js +4 -4
package/dist/core/reranking-strategies.d.ts +1 -16
package/dist/core/reranking-strategies.js +12 -82
package/dist/core/vector-index.d.ts +1 -1
package/dist/core/vector-index.js +31 -32
package/dist/dom-polyfills.js +3 -6
package/dist/factories/index.d.ts +2 -0
package/dist/factories/index.js +2 -0
package/dist/factories/polymorphic-factory.d.ts +50 -0
package/dist/factories/polymorphic-factory.js +159 -0
package/dist/file-processor.js +30 -102
package/dist/index.d.ts +23 -0
package/dist/index.js +18 -0
package/dist/ingestion.js +18 -3
package/dist/multimodal/clip-embedder.d.ts +18 -5
package/dist/multimodal/clip-embedder.js +73 -26
package/dist/search.d.ts +34 -9
package/dist/search.js +28 -10
package/package.json +13 -4

package/README.md CHANGED Viewed

@@ -433,7 +433,6 @@ Now Claude can search your docs directly! Works with any MCP-compatible AI tool.
 - **Content management** - Deduplication, cleanup
 - **Model compatibility** - Auto-detection, rebuilds
 - **Error recovery** - Clear messages, helpful hints
-- **Battle-tested** - Used in real applications
 </td>
 </tr>

package/dist/core/batch-processing-optimizer.js CHANGED Viewed

@@ -15,8 +15,8 @@ export const DEFAULT_BATCH_CONFIG = {
     textBatchSize: 16,
     imageBatchSize: 4, // Smaller for memory-intensive image processing
     maxConcurrentBatches: 2,
-    // Memory management (256MB threshold)
-    memoryThresholdMB: 256,
+    // Memory management (512MB threshold for multimodal processing)
+    memoryThresholdMB: 512,
     enableMemoryMonitoring: true,
     enableGarbageCollection: true,
     // Progress reporting every 5 batches
@@ -402,13 +402,8 @@ export class BatchProcessingOptimizer {
      */
     async preloadImageProcessingModels() {
         try {
-            if (!this.resourcePool.has('imageToText')) {
-                console.log('Preloading image-to-text processor...');
-                const processor = await LazyMultimodalLoader.loadImageToTextProcessor();
-                this.resourcePool.set('imageToText', processor);
-                // Register with resource manager
-                this.resourceManager.registerImageProcessor(processor, 'image-to-text');
-            }
+            // Note: Image-to-text processor is loaded on-demand by file-processor.ts
+            // to avoid conflicts with different pipeline configurations
             if (!this.resourcePool.has('metadataExtractor')) {
                 console.log('Preloading image metadata extractor...');
                 const extractor = await LazyMultimodalLoader.loadImageMetadataExtractor();
@@ -519,7 +514,7 @@ export function createImageBatchProcessor() {
     return new BatchProcessingOptimizer({
         imageBatchSize: 2, // Very small batches for memory efficiency
         textBatchSize: 8,
-        memoryThresholdMB: 128, // Lower threshold for images
+        memoryThresholdMB: 512, // Higher threshold for memory-intensive image processing
         enableMemoryMonitoring: true,
         enableGarbageCollection: true,
         enableParallelProcessing: false, // Sequential for better memory control
@@ -534,7 +529,7 @@ export function createTextBatchProcessor() {
         textBatchSize: 32, // Larger batches for text
         imageBatchSize: 4,
         enableParallelProcessing: true, // Parallel processing for text
-        memoryThresholdMB: 512, // Higher threshold for text
+        memoryThresholdMB: 256, // Lower threshold sufficient for text processing
         progressReportInterval: 10
     });
 }

package/dist/core/binary-index-format.d.ts ADDED Viewed

@@ -0,0 +1,52 @@
+/**
+ * Binary Index Format Module
+ *
+ * Provides efficient binary serialization for HNSW vector indices.
+ *
+ * Format Specification:
+ * - Header: 24 bytes (6 × uint32)
+ * - Vectors: N × (4 + D × 4) bytes
+ * - Little-endian encoding for cross-platform compatibility
+ * - 4-byte alignment for Float32Array zero-copy views
+ *
+ * Performance:
+ * - 3.66x smaller than JSON format
+ * - 3.5x faster loading
+ * - Zero-copy Float32Array views
+ */
+export interface BinaryIndexData {
+    dimensions: number;
+    maxElements: number;
+    M: number;
+    efConstruction: number;
+    seed: number;
+    currentSize: number;
+    vectors: Array<{
+        id: number;
+        vector: Float32Array;
+    }>;
+}
+export declare class BinaryIndexFormat {
+    /**
+     * Save index data to binary format
+     *
+     * File structure:
+     * - Header (24 bytes): dimensions, maxElements, M, efConstruction, seed, currentSize
+     * - Vectors: For each vector: id (4 bytes) + vector data (dimensions × 4 bytes)
+     *
+     * @param indexPath Path to save the binary index file
+     * @param data Index data to serialize
+     */
+    static save(indexPath: string, data: BinaryIndexData): Promise<void>;
+    /**
+     * Load index data from binary format
+     *
+     * Uses zero-copy Float32Array views for efficient loading.
+     * Copies the views to ensure data persistence after buffer lifecycle.
+     *
+     * @param indexPath Path to the binary index file
+     * @returns Deserialized index data
+     */
+    static load(indexPath: string): Promise<BinaryIndexData>;
+}
+//# sourceMappingURL=binary-index-format.d.ts.map

package/dist/core/binary-index-format.js ADDED Viewed

@@ -0,0 +1,122 @@
+/**
+ * Binary Index Format Module
+ *
+ * Provides efficient binary serialization for HNSW vector indices.
+ *
+ * Format Specification:
+ * - Header: 24 bytes (6 × uint32)
+ * - Vectors: N × (4 + D × 4) bytes
+ * - Little-endian encoding for cross-platform compatibility
+ * - 4-byte alignment for Float32Array zero-copy views
+ *
+ * Performance:
+ * - 3.66x smaller than JSON format
+ * - 3.5x faster loading
+ * - Zero-copy Float32Array views
+ */
+import { readFileSync, writeFileSync } from 'fs';
+export class BinaryIndexFormat {
+    /**
+     * Save index data to binary format
+     *
+     * File structure:
+     * - Header (24 bytes): dimensions, maxElements, M, efConstruction, seed, currentSize
+     * - Vectors: For each vector: id (4 bytes) + vector data (dimensions × 4 bytes)
+     *
+     * @param indexPath Path to save the binary index file
+     * @param data Index data to serialize
+     */
+    static async save(indexPath, data) {
+        // Calculate total size
+        const headerSize = 24; // 6 uint32 fields
+        const vectorSize = 4 + (data.dimensions * 4); // id + vector
+        const totalSize = headerSize + (data.currentSize * vectorSize);
+        const buffer = new ArrayBuffer(totalSize);
+        const view = new DataView(buffer);
+        let offset = 0;
+        // Write header (24 bytes, all little-endian)
+        view.setUint32(offset, data.dimensions, true);
+        offset += 4;
+        view.setUint32(offset, data.maxElements, true);
+        offset += 4;
+        view.setUint32(offset, data.M, true);
+        offset += 4;
+        view.setUint32(offset, data.efConstruction, true);
+        offset += 4;
+        view.setUint32(offset, data.seed, true);
+        offset += 4;
+        view.setUint32(offset, data.currentSize, true);
+        offset += 4;
+        // Write vectors
+        for (const item of data.vectors) {
+            // Ensure 4-byte alignment (should always be true with our format)
+            if (offset % 4 !== 0) {
+                throw new Error(`Offset ${offset} is not 4-byte aligned`);
+            }
+            // Write vector ID
+            view.setUint32(offset, item.id, true);
+            offset += 4;
+            // Write vector data
+            for (let i = 0; i < item.vector.length; i++) {
+                view.setFloat32(offset, item.vector[i], true);
+                offset += 4;
+            }
+        }
+        // Write to file
+        writeFileSync(indexPath, Buffer.from(buffer));
+    }
+    /**
+     * Load index data from binary format
+     *
+     * Uses zero-copy Float32Array views for efficient loading.
+     * Copies the views to ensure data persistence after buffer lifecycle.
+     *
+     * @param indexPath Path to the binary index file
+     * @returns Deserialized index data
+     */
+    static async load(indexPath) {
+        const buffer = readFileSync(indexPath);
+        const view = new DataView(buffer.buffer, buffer.byteOffset, buffer.byteLength);
+        let offset = 0;
+        // Read header (24 bytes, all little-endian)
+        const dimensions = view.getUint32(offset, true);
+        offset += 4;
+        const maxElements = view.getUint32(offset, true);
+        offset += 4;
+        const M = view.getUint32(offset, true);
+        offset += 4;
+        const efConstruction = view.getUint32(offset, true);
+        offset += 4;
+        const seed = view.getUint32(offset, true);
+        offset += 4;
+        const currentSize = view.getUint32(offset, true);
+        offset += 4;
+        // Read vectors
+        const vectors = [];
+        for (let i = 0; i < currentSize; i++) {
+            // Ensure 4-byte alignment (should always be true with our format)
+            if (offset % 4 !== 0) {
+                throw new Error(`Offset ${offset} is not 4-byte aligned`);
+            }
+            // Read vector ID
+            const id = view.getUint32(offset, true);
+            offset += 4;
+            // Zero-copy Float32Array view (fast!)
+            const vectorView = new Float32Array(buffer.buffer, buffer.byteOffset + offset, dimensions);
+            // Copy to avoid buffer lifecycle issues
+            const vector = new Float32Array(vectorView);
+            offset += dimensions * 4;
+            vectors.push({ id, vector });
+        }
+        return {
+            dimensions,
+            maxElements,
+            M,
+            efConstruction,
+            seed,
+            currentSize,
+            vectors
+        };
+    }
+}
+//# sourceMappingURL=binary-index-format.js.map

package/dist/core/ingestion.js CHANGED Viewed

@@ -290,7 +290,7 @@ export class IngestionPipeline {
                 chunkSize: config.chunk_size,
                 chunkOverlap: config.chunk_overlap
             };
-            const chunkingResult = await this.chunkDocumentsWithContentTypes(fileResult.documents, effectiveChunkConfig);
+            const chunkingResult = await this.chunkDocumentsWithContentTypes(fileResult.documents, effectiveChunkConfig, options.mode);
             if (chunkingResult.totalChunks === 0) {
                 console.log('No chunks created from documents');
                 return {
@@ -364,7 +364,7 @@ export class IngestionPipeline {
      * Chunk all documents and organize results with content-type awareness
      * Enhanced to handle different content types appropriately
      */
-    async chunkDocumentsWithContentTypes(documents, chunkConfig) {
+    async chunkDocumentsWithContentTypes(documents, chunkConfig, mode) {
         const documentChunks = [];
         const allChunks = [];
         let totalChunks = 0;
@@ -384,8 +384,18 @@ export class IngestionPipeline {
                             metadata: document.metadata
                         }];
                 }
+                else if (mode === 'multimodal') {
+                    // In multimodal mode, don't chunk text - CLIP handles truncation at 77 tokens
+                    // Chunking doesn't make sense because CLIP can't handle long text anyway
+                    chunks = [{
+                            text: document.content,
+                            chunkIndex: 0,
+                            contentType: 'text',
+                            metadata: document.metadata
+                        }];
+                }
                 else {
-                    // For text documents, use normal chunking
+                    // For text mode, use normal chunking
                     const textChunks = await chunkDocument(document, chunkConfig);
                     chunks = textChunks.map(chunk => ({
                         ...chunk,

package/dist/core/model-registry.js CHANGED Viewed

@@ -69,7 +69,7 @@ export const SUPPORTED_MODELS = {
             supportsMetadata: true,
             supportsMultimodal: true, // True cross-modal search capabilities
             maxBatchSize: 8,
-            maxTextLength: 77, // CLIP's text sequence length limit
+            maxTextLength: 77, // CLIP's token limit (tokenizer handles truncation)
             supportedImageFormats: ['jpg', 'jpeg', 'png', 'webp', 'gif']
         },
         requirements: {
@@ -92,7 +92,7 @@ export const SUPPORTED_MODELS = {
             supportsMetadata: true,
             supportsMultimodal: true, // True cross-modal search capabilities
             maxBatchSize: 4,
-            maxTextLength: 77, // CLIP's text sequence length limit
+            maxTextLength: 77, // CLIP's token limit (tokenizer handles truncation)
             supportedImageFormats: ['jpg', 'jpeg', 'png', 'webp', 'gif']
         },
         requirements: {
@@ -194,9 +194,9 @@ export class ModelRegistry {
             suggestions.push('Use smaller batch sizes for optimal performance');
         }
         // Text length limitations
-        if (modelInfo.capabilities.maxTextLength && modelInfo.capabilities.maxTextLength < 512) {
+        if (modelInfo.capabilities.maxTextLength && modelInfo.capabilities.maxTextLength < 256) {
             warnings.push(`Model has limited text length: ${modelInfo.capabilities.maxTextLength} characters`);
-            suggestions.push('Consider chunking long texts before processing');
+            suggestions.push('Long texts will be truncated by the tokenizer');
         }
         // Image format support
         if (modelInfo.capabilities.supportsImages && modelInfo.capabilities.supportedImageFormats) {

package/dist/core/reranking-strategies.d.ts CHANGED Viewed

@@ -97,20 +97,10 @@ export declare class TextDerivedRerankingStrategy implements RerankingStrategy {
     readonly supportedContentTypes: string[];
     isEnabled: boolean;
     private crossEncoderReranker;
-    private imageToTextModel;
-    private imageToTextModelName;
-    private initialized;
     constructor(imageToTextModelName?: string, crossEncoderModelName?: string);
-    /**
-     * Initialize the image-to-text model if not already done
-     */
-    private ensureInitialized;
-    /**
-     * Ensure DOM polyfills are set up for transformers.js
-     */
-    private ensurePolyfills;
     /**
      * Generate text description for an image
+     * Uses the shared image-to-text functionality from file-processor
      */
     private generateImageDescription;
     /**
@@ -128,11 +118,6 @@ export declare class TextDerivedRerankingStrategy implements RerankingStrategy {
         description: string;
         requiredModels: string[];
         configOptions: {
-            imageToTextModel: {
-                type: string;
-                description: string;
-                default: string;
-            };
             crossEncoderModel: {
                 type: string;
                 description: string;

package/dist/core/reranking-strategies.js CHANGED Viewed

@@ -174,69 +174,22 @@ export class TextDerivedRerankingStrategy {
     supportedContentTypes = ['text', 'image'];
     isEnabled = true;
     crossEncoderReranker;
-    imageToTextModel = null;
-    imageToTextModelName = 'Xenova/vit-gpt2-image-captioning';
-    initialized = false;
     constructor(imageToTextModelName, crossEncoderModelName) {
-        if (imageToTextModelName) {
-            this.imageToTextModelName = imageToTextModelName;
-        }
+        // Note: imageToTextModelName parameter is kept for backward compatibility
+        // but is no longer used since we delegate to file-processor's implementation
         // Create the underlying cross-encoder strategy
         this.crossEncoderReranker = new CrossEncoderRerankingStrategy(crossEncoderModelName);
     }
-    /**
-     * Initialize the image-to-text model if not already done
-     */
-    async ensureInitialized() {
-        if (!this.initialized) {
-            try {
-                console.log(`Loading image-to-text model: ${this.imageToTextModelName}`);
-                // Set up polyfills for transformers.js
-                this.ensurePolyfills();
-                const { pipeline } = await import('@huggingface/transformers');
-                this.imageToTextModel = await pipeline('image-to-text', this.imageToTextModelName);
-                this.initialized = true;
-                console.log(`Image-to-text model loaded successfully: ${this.imageToTextModelName}`);
-            }
-            catch (error) {
-                console.warn(`Image-to-text model initialization failed: ${error instanceof Error ? error.message : 'Unknown error'}`);
-                this.isEnabled = false;
-            }
-        }
-    }
-    /**
-     * Ensure DOM polyfills are set up for transformers.js
-     */
-    ensurePolyfills() {
-        if (typeof window === 'undefined' && typeof globalThis !== 'undefined') {
-            if (typeof globalThis.self === 'undefined') {
-                globalThis.self = globalThis;
-            }
-            if (typeof global.self === 'undefined') {
-                global.self = global;
-            }
-        }
-    }
     /**
      * Generate text description for an image
+     * Uses the shared image-to-text functionality from file-processor
      */
     async generateImageDescription(imagePath) {
-        await this.ensureInitialized();
-        if (!this.imageToTextModel) {
-            throw new Error('Image-to-text model not loaded');
-        }
         try {
-            const result = await this.imageToTextModel(imagePath);
-            // Handle different response formats from the pipeline
-            if (Array.isArray(result) && result.length > 0) {
-                return result[0].generated_text || result[0].text || String(result[0]);
-            }
-            else if (result && typeof result === 'object') {
-                return result.generated_text || result.text || String(result);
-            }
-            else {
-                return String(result);
-            }
+            // Use the file-processor's image description function which has proven to work reliably
+            const { generateImageDescriptionForFile } = await import('../file-processor.js');
+            const result = await generateImageDescriptionForFile(imagePath);
+            return result.description;
         }
         catch (error) {
             console.warn(`Failed to generate description for image ${imagePath}: ${error instanceof Error ? error.message : 'Unknown error'}`);
@@ -249,22 +202,11 @@ export class TextDerivedRerankingStrategy {
      * Rerank search results using text-derived approach
      */
     rerank = async (query, results, contentType) => {
-        // If strategy is disabled, return results unchanged
-        if (!this.isEnabled) {
-            return results;
-        }
         // Validate content type
         if (contentType && !this.supportedContentTypes.includes(contentType)) {
             throw new Error(`Text-derived strategy does not support content type '${contentType}'. ` +
                 `Supported types: ${this.supportedContentTypes.join(', ')}`);
         }
-        // Ensure models are initialized
-        await this.ensureInitialized();
-        // If initialization failed, return results unchanged
-        if (!this.isEnabled) {
-            console.warn('Text-derived reranker not enabled, returning results unchanged');
-            return results;
-        }
         try {
             // Step 1: Convert images to text descriptions
             const processedResults = await Promise.all(results.map(async (result) => {
@@ -314,12 +256,8 @@ export class TextDerivedRerankingStrategy {
      * Configure the reranking strategy
      */
     configure(config) {
-        if (config.imageToTextModel && typeof config.imageToTextModel === 'string') {
-            this.imageToTextModelName = config.imageToTextModel;
-            // Reset initialization to use new model
-            this.initialized = false;
-            this.imageToTextModel = null;
-        }
+        // Note: imageToTextModel configuration is no longer used
+        // since we delegate to file-processor's implementation
         if (config.crossEncoderModel && typeof config.crossEncoderModel === 'string') {
             this.crossEncoderReranker.configure({ modelName: config.crossEncoderModel });
         }
@@ -334,15 +272,10 @@ export class TextDerivedRerankingStrategy {
         return {
             description: 'Text-derived reranking that converts images to text descriptions then applies cross-encoder reranking',
             requiredModels: [
-                'Xenova/vit-gpt2-image-captioning', // Image-to-text model
+                'Xenova/vit-gpt2-image-captioning', // Image-to-text model (via file-processor)
                 'Xenova/ms-marco-MiniLM-L-6-v2' // Cross-encoder model
             ],
             configOptions: {
-                imageToTextModel: {
-                    type: 'string',
-                    description: 'Image-to-text model name for generating descriptions',
-                    default: 'Xenova/vit-gpt2-image-captioning'
-                },
                 crossEncoderModel: {
                     type: 'string',
                     description: 'Cross-encoder model name for text reranking',
@@ -360,16 +293,15 @@ export class TextDerivedRerankingStrategy {
      * Check if the strategy is ready to use
      */
     async isReady() {
-        await this.ensureInitialized();
         const crossEncoderReady = await this.crossEncoderReranker.isReady();
-        return this.isEnabled && this.imageToTextModel !== null && crossEncoderReady;
+        return this.isEnabled && crossEncoderReady;
     }
     /**
      * Get the current model names being used
      */
     getModelNames() {
         return {
-            imageToText: this.imageToTextModelName,
+            imageToText: 'Xenova/vit-gpt2-image-captioning', // Fixed model via file-processor
             crossEncoder: this.crossEncoderReranker.getModelName()
         };
     }
@@ -377,8 +309,6 @@ export class TextDerivedRerankingStrategy {
      * Clean up resources
      */
     async cleanup() {
-        this.initialized = false;
-        this.imageToTextModel = null;
         await this.crossEncoderReranker.cleanup();
     }
 }

package/dist/core/vector-index.d.ts CHANGED Viewed

@@ -30,7 +30,7 @@ export declare class VectorIndex {
      */
     loadIndex(): Promise<void>;
     /**
-     * Save index to file using JSON format (since IDBFS doesn't work in Node.js)
+     * Save index to binary format
      */
     saveIndex(): Promise<void>;
     /**

package/dist/core/vector-index.js CHANGED Viewed

@@ -2,10 +2,11 @@
  * CORE MODULE — Shared between text-only (rag-lite-ts) and future multimodal (rag-lite-mm)
  * Model-agnostic. No transformer or modality-specific logic.
  */
-import { readFileSync, writeFileSync, existsSync } from 'fs';
+import { existsSync } from 'fs';
 import { JSDOM } from 'jsdom';
 import { ErrorCategory, ErrorSeverity, safeExecute } from './error-handler.js';
 import { createMissingFileError, createDimensionMismatchError } from './actionable-error-messages.js';
+import { BinaryIndexFormat } from './binary-index-format.js';
 // Set up browser-like environment for hnswlib-wasm
 if (typeof window === 'undefined') {
     const dom = new JSDOM('<!DOCTYPE html><html><body></body></html>', {
@@ -153,66 +154,64 @@ export class VectorIndex {
             }
             // Create new HNSW index (third parameter is autoSaveFilename, but we'll handle persistence manually)
             this.index = new this.hnswlib.HierarchicalNSW('cosine', this.options.dimensions, '');
-            // Load from JSON format since IDBFS doesn't work in Node.js
-            const data = readFileSync(this.indexPath, 'utf-8');
-            const stored = JSON.parse(data);
-            // Check dimension compatibility and log details
-            if (stored.dimensions && stored.dimensions !== this.options.dimensions) {
+            // Load from binary format
+            const data = await BinaryIndexFormat.load(this.indexPath);
+            // Validate dimensions
+            if (data.dimensions !== this.options.dimensions) {
                 console.log(`⚠️  Dimension mismatch detected:`);
-                console.log(`   Stored dimensions: ${stored.dimensions}`);
+                console.log(`   Stored dimensions: ${data.dimensions}`);
                 console.log(`   Expected dimensions: ${this.options.dimensions}`);
-                console.log(`   Number of vectors: ${stored.vectors?.length || 0}`);
-                if (stored.vectors && stored.vectors.length > 0) {
-                    console.log(`   Actual vector length: ${stored.vectors[0].vector.length}`);
+                console.log(`   Number of vectors: ${data.vectors.length}`);
+                if (data.vectors.length > 0) {
+                    console.log(`   Actual vector length: ${data.vectors[0].vector.length}`);
                 }
-                throw createDimensionMismatchError(this.options.dimensions, stored.dimensions, 'vector index loading', { operationContext: 'VectorIndex.loadIndex' });
+                throw createDimensionMismatchError(this.options.dimensions, data.dimensions, 'vector index loading', { operationContext: 'VectorIndex.loadIndex' });
             }
             // Update options from stored data
-            this.options.maxElements = stored.maxElements || this.options.maxElements;
-            this.options.M = stored.M || this.options.M;
-            this.options.efConstruction = stored.efConstruction || this.options.efConstruction;
-            this.options.seed = stored.seed || this.options.seed;
-            // Recreate the index from stored data
-            this.index.initIndex(this.options.maxElements, this.options.M || 16, this.options.efConstruction || 200, this.options.seed || 100);
+            this.options.maxElements = data.maxElements;
+            this.options.M = data.M;
+            this.options.efConstruction = data.efConstruction;
+            this.options.seed = data.seed;
+            // Initialize HNSW index
+            this.index.initIndex(this.options.maxElements, this.options.M, this.options.efConstruction, this.options.seed);
             // Clear and repopulate vector storage
             this.vectorStorage.clear();
-            // Add all stored vectors back
-            for (const item of stored.vectors || []) {
-                const vector = new Float32Array(item.vector);
-                this.index.addPoint(vector, item.id, false);
-                this.vectorStorage.set(item.id, vector);
+            // Add all stored vectors to HNSW index
+            for (const item of data.vectors) {
+                this.index.addPoint(item.vector, item.id, false);
+                this.vectorStorage.set(item.id, item.vector);
             }
-            this.currentSize = stored.vectors?.length || 0;
-            console.log(`Loaded HNSW index with ${this.currentSize} vectors from ${this.indexPath}`);
+            this.currentSize = data.currentSize;
+            console.log(`✓ Loaded HNSW index with ${this.currentSize} vectors from ${this.indexPath}`);
         }
         catch (error) {
             throw new Error(`Failed to load index from ${this.indexPath}: ${error}`);
         }
     }
     /**
-     * Save index to file using JSON format (since IDBFS doesn't work in Node.js)
+     * Save index to binary format
      */
     async saveIndex() {
         if (!this.index) {
             throw new Error('Index not initialized');
         }
         try {
-            // Convert stored vectors to serializable format
+            // Collect all vectors from storage
             const vectors = Array.from(this.vectorStorage.entries()).map(([id, vector]) => ({
                 id,
-                vector: Array.from(vector)
+                vector
             }));
-            const stored = {
+            // Save to binary format
+            await BinaryIndexFormat.save(this.indexPath, {
                 dimensions: this.options.dimensions,
                 maxElements: this.options.maxElements,
                 M: this.options.M || 16,
                 efConstruction: this.options.efConstruction || 200,
                 seed: this.options.seed || 100,
                 currentSize: this.currentSize,
-                vectors: vectors
-            };
-            writeFileSync(this.indexPath, JSON.stringify(stored, null, 2));
-            console.log(`Saved HNSW index with ${this.currentSize} vectors to ${this.indexPath}`);
+                vectors
+            });
+            console.log(`✓ Saved HNSW index with ${this.currentSize} vectors to ${this.indexPath}`);
         }
         catch (error) {
             throw new Error(`Failed to save index to ${this.indexPath}: ${error}`);

package/dist/dom-polyfills.js CHANGED Viewed

@@ -30,11 +30,8 @@ if (typeof window === 'undefined') {
     if (typeof globalThis.navigator === 'undefined') {
         globalThis.navigator = dom.window.navigator;
     }
-    // Polyfill createImageBitmap if needed (for image processing)
-    if (typeof globalThis.createImageBitmap === 'undefined') {
-        globalThis.createImageBitmap = dom.window.createImageBitmap || (() => {
-            throw new Error('createImageBitmap not available in Node.js environment');
-        });
-    }
+    // Note: Do NOT polyfill createImageBitmap with a fake implementation
+    // RawImage.fromURL() will handle image loading correctly without it
+    // Setting a fake createImageBitmap that throws errors breaks image loading
 }
 //# sourceMappingURL=dom-polyfills.js.map

package/dist/factories/index.d.ts CHANGED Viewed

@@ -36,6 +36,8 @@
  * ```
  */
 export { TextSearchFactory, TextIngestionFactory, TextRAGFactory, TextFactoryHelpers } from './text-factory.js';
+export { PolymorphicSearchFactory } from './polymorphic-factory.js';
+export type { PolymorphicSearchOptions } from './polymorphic-factory.js';
 export type { TextSearchOptions, TextIngestionOptions, ContentSystemConfig } from './text-factory.js';
 export { TextSearchFactory as SearchFactory } from './text-factory.js';
 export { TextIngestionFactory as IngestionFactory } from './text-factory.js';

package/dist/factories/index.js CHANGED Viewed

@@ -37,6 +37,8 @@
  */
 // Main factory classes
 export { TextSearchFactory, TextIngestionFactory, TextRAGFactory, TextFactoryHelpers } from './text-factory.js';
+// Polymorphic factory for mode-aware search
+export { PolymorphicSearchFactory } from './polymorphic-factory.js';
 // Convenience re-exports for common patterns
 export { TextSearchFactory as SearchFactory } from './text-factory.js';
 export { TextIngestionFactory as IngestionFactory } from './text-factory.js';