npm - rag-lite-ts - Versions diffs - 2.0.1 → 2.0.3 - Mend

rag-lite-ts 2.0.1 → 2.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (16) hide show

package/README.md +27 -0
package/dist/cli/indexer.js +21 -2
package/dist/cli.js +2 -2
package/dist/core/batch-processing-optimizer.js +6 -11
package/dist/core/ingestion.js +13 -3
package/dist/core/model-registry.js +4 -4
package/dist/core/reranking-strategies.d.ts +1 -16
package/dist/core/reranking-strategies.js +12 -82
package/dist/dom-polyfills.js +3 -6
package/dist/factories/text-factory.js +32 -18
package/dist/file-processor.js +30 -102
package/dist/indexer.js +5 -2
package/dist/ingestion.js +18 -3
package/dist/mcp-server.js +16 -9
package/dist/multimodal/clip-embedder.js +11 -11
package/package.json +1 -1

package/README.md CHANGED Viewed

@@ -438,6 +438,33 @@ Now Claude can search your docs directly! Works with any MCP-compatible AI tool.
 </tr>
 </table>
+### 📁 Supported File Formats
+RAG-lite TS supports the following file formats with full processing implementations:
+**Text Mode:**
+- Markdown: `.md`, `.mdx`
+- Plain text: `.txt`
+- Documents: `.pdf`, `.docx`
+**Multimodal Mode** (includes all text formats plus):
+- Images: `.jpg`, `.jpeg`, `.png`, `.gif`, `.webp`, `.bmp`
+All formats work seamlessly with both single file and directory ingestion:
+```bash
+# Single file ingestion
+raglite ingest ./document.pdf
+raglite ingest ./readme.md
+raglite ingest ./notes.txt
+# Directory ingestion (processes all supported formats)
+raglite ingest ./docs/
+# Multimodal ingestion (includes images)
+raglite ingest ./mixed-content/ --mode multimodal
+```
 ## 🔧 How It Works
 RAG-lite TS follows a clean, efficient pipeline:

package/dist/cli/indexer.js CHANGED Viewed

@@ -148,12 +148,31 @@ export async function runIngest(path, options = {}) {
         const pathType = stats.isDirectory() ? 'directory' : 'file';
         // Validate file type for single files
         if (stats.isFile()) {
-            const validExtensions = ['.md', '.txt'];
+            const mode = options.mode || 'text';
+            // Only formats with actual processing implementations
+            const textExtensions = ['.md', '.txt', '.mdx', '.pdf', '.docx'];
+            const imageExtensions = ['.jpg', '.jpeg', '.png', '.gif', '.webp', '.bmp'];
+            const validExtensions = mode === 'multimodal'
+                ? [...textExtensions, ...imageExtensions]
+                : textExtensions;
             const hasValidExtension = validExtensions.some(ext => path.toLowerCase().endsWith(ext));
             if (!hasValidExtension) {
                 console.error(`Error: Unsupported file type: ${path}`);
                 console.error('');
-                console.error('Supported file types: .md, .txt');
+                if (mode === 'multimodal') {
+                    console.error('Supported file types in multimodal mode:');
+                    console.error('  Text: .md, .txt, .mdx');
+                    console.error('  Documents: .pdf, .docx');
+                    console.error('  Images: .jpg, .jpeg, .png, .gif, .webp, .bmp');
+                }
+                else {
+                    console.error('Supported file types in text mode:');
+                    console.error('  Text: .md, .txt, .mdx');
+                    console.error('  Documents: .pdf, .docx');
+                    console.error('');
+                    console.error('For image files, use --mode multimodal:');
+                    console.error('  raglite ingest <path> --mode multimodal');
+                }
                 console.error('');
                 console.error('If you want to ingest multiple files, provide a directory path instead.');
                 process.exit(EXIT_CODES.INVALID_ARGUMENTS);

package/dist/cli.js CHANGED Viewed

@@ -26,7 +26,7 @@ Commands:
   help              Show this help message
 Examples:
-  raglite ingest ./docs/           # Ingest all .md/.txt files in docs/
+  raglite ingest ./docs/           # Ingest all .md/.txt/.docx/.pdf files in docs/
   raglite ingest ./readme.md       # Ingest single file
   raglite ingest ./docs/ --model Xenova/all-mpnet-base-v2  # Use higher quality model
   raglite ingest ./docs/ --mode multimodal  # Enable multimodal processing
@@ -126,7 +126,7 @@ function validateArgs(command, args, options) {
                 console.error('Usage: raglite ingest <path>');
                 console.error('');
                 console.error('Examples:');
-                console.error('  raglite ingest ./docs/           # Ingest all .md/.txt files in docs/');
+                console.error('  raglite ingest ./docs/           # Ingest all .md/.txt/.docx/.pdf files in docs/');
                 console.error('  raglite ingest ./readme.md       # Ingest single file');
                 console.error('  raglite ingest ./docs/ --model Xenova/all-mpnet-base-v2  # Use higher quality model');
                 console.error('  raglite ingest ./docs/ --mode multimodal  # Enable multimodal processing');

package/dist/core/batch-processing-optimizer.js CHANGED Viewed

@@ -15,8 +15,8 @@ export const DEFAULT_BATCH_CONFIG = {
     textBatchSize: 16,
     imageBatchSize: 4, // Smaller for memory-intensive image processing
     maxConcurrentBatches: 2,
-    // Memory management (256MB threshold)
-    memoryThresholdMB: 256,
+    // Memory management (512MB threshold for multimodal processing)
+    memoryThresholdMB: 512,
     enableMemoryMonitoring: true,
     enableGarbageCollection: true,
     // Progress reporting every 5 batches
@@ -402,13 +402,8 @@ export class BatchProcessingOptimizer {
      */
     async preloadImageProcessingModels() {
         try {
-            if (!this.resourcePool.has('imageToText')) {
-                console.log('Preloading image-to-text processor...');
-                const processor = await LazyMultimodalLoader.loadImageToTextProcessor();
-                this.resourcePool.set('imageToText', processor);
-                // Register with resource manager
-                this.resourceManager.registerImageProcessor(processor, 'image-to-text');
-            }
+            // Note: Image-to-text processor is loaded on-demand by file-processor.ts
+            // to avoid conflicts with different pipeline configurations
             if (!this.resourcePool.has('metadataExtractor')) {
                 console.log('Preloading image metadata extractor...');
                 const extractor = await LazyMultimodalLoader.loadImageMetadataExtractor();
@@ -519,7 +514,7 @@ export function createImageBatchProcessor() {
     return new BatchProcessingOptimizer({
         imageBatchSize: 2, // Very small batches for memory efficiency
         textBatchSize: 8,
-        memoryThresholdMB: 128, // Lower threshold for images
+        memoryThresholdMB: 512, // Higher threshold for memory-intensive image processing
         enableMemoryMonitoring: true,
         enableGarbageCollection: true,
         enableParallelProcessing: false, // Sequential for better memory control
@@ -534,7 +529,7 @@ export function createTextBatchProcessor() {
         textBatchSize: 32, // Larger batches for text
         imageBatchSize: 4,
         enableParallelProcessing: true, // Parallel processing for text
-        memoryThresholdMB: 512, // Higher threshold for text
+        memoryThresholdMB: 256, // Lower threshold sufficient for text processing
         progressReportInterval: 10
     });
 }

package/dist/core/ingestion.js CHANGED Viewed

@@ -290,7 +290,7 @@ export class IngestionPipeline {
                 chunkSize: config.chunk_size,
                 chunkOverlap: config.chunk_overlap
             };
-            const chunkingResult = await this.chunkDocumentsWithContentTypes(fileResult.documents, effectiveChunkConfig);
+            const chunkingResult = await this.chunkDocumentsWithContentTypes(fileResult.documents, effectiveChunkConfig, options.mode);
             if (chunkingResult.totalChunks === 0) {
                 console.log('No chunks created from documents');
                 return {
@@ -364,7 +364,7 @@ export class IngestionPipeline {
      * Chunk all documents and organize results with content-type awareness
      * Enhanced to handle different content types appropriately
      */
-    async chunkDocumentsWithContentTypes(documents, chunkConfig) {
+    async chunkDocumentsWithContentTypes(documents, chunkConfig, mode) {
         const documentChunks = [];
         const allChunks = [];
         let totalChunks = 0;
@@ -384,8 +384,18 @@ export class IngestionPipeline {
                             metadata: document.metadata
                         }];
                 }
+                else if (mode === 'multimodal') {
+                    // In multimodal mode, don't chunk text - CLIP handles truncation at 77 tokens
+                    // Chunking doesn't make sense because CLIP can't handle long text anyway
+                    chunks = [{
+                            text: document.content,
+                            chunkIndex: 0,
+                            contentType: 'text',
+                            metadata: document.metadata
+                        }];
+                }
                 else {
-                    // For text documents, use normal chunking
+                    // For text mode, use normal chunking
                     const textChunks = await chunkDocument(document, chunkConfig);
                     chunks = textChunks.map(chunk => ({
                         ...chunk,

package/dist/core/model-registry.js CHANGED Viewed

@@ -69,7 +69,7 @@ export const SUPPORTED_MODELS = {
             supportsMetadata: true,
             supportsMultimodal: true, // True cross-modal search capabilities
             maxBatchSize: 8,
-            maxTextLength: 77, // CLIP's text sequence length limit
+            maxTextLength: 77, // CLIP's token limit (tokenizer handles truncation)
             supportedImageFormats: ['jpg', 'jpeg', 'png', 'webp', 'gif']
         },
         requirements: {
@@ -92,7 +92,7 @@ export const SUPPORTED_MODELS = {
             supportsMetadata: true,
             supportsMultimodal: true, // True cross-modal search capabilities
             maxBatchSize: 4,
-            maxTextLength: 77, // CLIP's text sequence length limit
+            maxTextLength: 77, // CLIP's token limit (tokenizer handles truncation)
             supportedImageFormats: ['jpg', 'jpeg', 'png', 'webp', 'gif']
         },
         requirements: {
@@ -194,9 +194,9 @@ export class ModelRegistry {
             suggestions.push('Use smaller batch sizes for optimal performance');
         }
         // Text length limitations
-        if (modelInfo.capabilities.maxTextLength && modelInfo.capabilities.maxTextLength < 512) {
+        if (modelInfo.capabilities.maxTextLength && modelInfo.capabilities.maxTextLength < 256) {
             warnings.push(`Model has limited text length: ${modelInfo.capabilities.maxTextLength} characters`);
-            suggestions.push('Consider chunking long texts before processing');
+            suggestions.push('Long texts will be truncated by the tokenizer');
         }
         // Image format support
         if (modelInfo.capabilities.supportsImages && modelInfo.capabilities.supportedImageFormats) {

package/dist/core/reranking-strategies.d.ts CHANGED Viewed

@@ -97,20 +97,10 @@ export declare class TextDerivedRerankingStrategy implements RerankingStrategy {
     readonly supportedContentTypes: string[];
     isEnabled: boolean;
     private crossEncoderReranker;
-    private imageToTextModel;
-    private imageToTextModelName;
-    private initialized;
     constructor(imageToTextModelName?: string, crossEncoderModelName?: string);
-    /**
-     * Initialize the image-to-text model if not already done
-     */
-    private ensureInitialized;
-    /**
-     * Ensure DOM polyfills are set up for transformers.js
-     */
-    private ensurePolyfills;
     /**
      * Generate text description for an image
+     * Uses the shared image-to-text functionality from file-processor
      */
     private generateImageDescription;
     /**
@@ -128,11 +118,6 @@ export declare class TextDerivedRerankingStrategy implements RerankingStrategy {
         description: string;
         requiredModels: string[];
         configOptions: {
-            imageToTextModel: {
-                type: string;
-                description: string;
-                default: string;
-            };
             crossEncoderModel: {
                 type: string;
                 description: string;

package/dist/core/reranking-strategies.js CHANGED Viewed

@@ -174,69 +174,22 @@ export class TextDerivedRerankingStrategy {
     supportedContentTypes = ['text', 'image'];
     isEnabled = true;
     crossEncoderReranker;
-    imageToTextModel = null;
-    imageToTextModelName = 'Xenova/vit-gpt2-image-captioning';
-    initialized = false;
     constructor(imageToTextModelName, crossEncoderModelName) {
-        if (imageToTextModelName) {
-            this.imageToTextModelName = imageToTextModelName;
-        }
+        // Note: imageToTextModelName parameter is kept for backward compatibility
+        // but is no longer used since we delegate to file-processor's implementation
         // Create the underlying cross-encoder strategy
         this.crossEncoderReranker = new CrossEncoderRerankingStrategy(crossEncoderModelName);
     }
-    /**
-     * Initialize the image-to-text model if not already done
-     */
-    async ensureInitialized() {
-        if (!this.initialized) {
-            try {
-                console.log(`Loading image-to-text model: ${this.imageToTextModelName}`);
-                // Set up polyfills for transformers.js
-                this.ensurePolyfills();
-                const { pipeline } = await import('@huggingface/transformers');
-                this.imageToTextModel = await pipeline('image-to-text', this.imageToTextModelName);
-                this.initialized = true;
-                console.log(`Image-to-text model loaded successfully: ${this.imageToTextModelName}`);
-            }
-            catch (error) {
-                console.warn(`Image-to-text model initialization failed: ${error instanceof Error ? error.message : 'Unknown error'}`);
-                this.isEnabled = false;
-            }
-        }
-    }
-    /**
-     * Ensure DOM polyfills are set up for transformers.js
-     */
-    ensurePolyfills() {
-        if (typeof window === 'undefined' && typeof globalThis !== 'undefined') {
-            if (typeof globalThis.self === 'undefined') {
-                globalThis.self = globalThis;
-            }
-            if (typeof global.self === 'undefined') {
-                global.self = global;
-            }
-        }
-    }
     /**
      * Generate text description for an image
+     * Uses the shared image-to-text functionality from file-processor
      */
     async generateImageDescription(imagePath) {
-        await this.ensureInitialized();
-        if (!this.imageToTextModel) {
-            throw new Error('Image-to-text model not loaded');
-        }
         try {
-            const result = await this.imageToTextModel(imagePath);
-            // Handle different response formats from the pipeline
-            if (Array.isArray(result) && result.length > 0) {
-                return result[0].generated_text || result[0].text || String(result[0]);
-            }
-            else if (result && typeof result === 'object') {
-                return result.generated_text || result.text || String(result);
-            }
-            else {
-                return String(result);
-            }
+            // Use the file-processor's image description function which has proven to work reliably
+            const { generateImageDescriptionForFile } = await import('../file-processor.js');
+            const result = await generateImageDescriptionForFile(imagePath);
+            return result.description;
         }
         catch (error) {
             console.warn(`Failed to generate description for image ${imagePath}: ${error instanceof Error ? error.message : 'Unknown error'}`);
@@ -249,22 +202,11 @@ export class TextDerivedRerankingStrategy {
      * Rerank search results using text-derived approach
      */
     rerank = async (query, results, contentType) => {
-        // If strategy is disabled, return results unchanged
-        if (!this.isEnabled) {
-            return results;
-        }
         // Validate content type
         if (contentType && !this.supportedContentTypes.includes(contentType)) {
             throw new Error(`Text-derived strategy does not support content type '${contentType}'. ` +
                 `Supported types: ${this.supportedContentTypes.join(', ')}`);
         }
-        // Ensure models are initialized
-        await this.ensureInitialized();
-        // If initialization failed, return results unchanged
-        if (!this.isEnabled) {
-            console.warn('Text-derived reranker not enabled, returning results unchanged');
-            return results;
-        }
         try {
             // Step 1: Convert images to text descriptions
             const processedResults = await Promise.all(results.map(async (result) => {
@@ -314,12 +256,8 @@ export class TextDerivedRerankingStrategy {
      * Configure the reranking strategy
      */
     configure(config) {
-        if (config.imageToTextModel && typeof config.imageToTextModel === 'string') {
-            this.imageToTextModelName = config.imageToTextModel;
-            // Reset initialization to use new model
-            this.initialized = false;
-            this.imageToTextModel = null;
-        }
+        // Note: imageToTextModel configuration is no longer used
+        // since we delegate to file-processor's implementation
         if (config.crossEncoderModel && typeof config.crossEncoderModel === 'string') {
             this.crossEncoderReranker.configure({ modelName: config.crossEncoderModel });
         }
@@ -334,15 +272,10 @@ export class TextDerivedRerankingStrategy {
         return {
             description: 'Text-derived reranking that converts images to text descriptions then applies cross-encoder reranking',
             requiredModels: [
-                'Xenova/vit-gpt2-image-captioning', // Image-to-text model
+                'Xenova/vit-gpt2-image-captioning', // Image-to-text model (via file-processor)
                 'Xenova/ms-marco-MiniLM-L-6-v2' // Cross-encoder model
             ],
             configOptions: {
-                imageToTextModel: {
-                    type: 'string',
-                    description: 'Image-to-text model name for generating descriptions',
-                    default: 'Xenova/vit-gpt2-image-captioning'
-                },
                 crossEncoderModel: {
                     type: 'string',
                     description: 'Cross-encoder model name for text reranking',
@@ -360,16 +293,15 @@ export class TextDerivedRerankingStrategy {
      * Check if the strategy is ready to use
      */
     async isReady() {
-        await this.ensureInitialized();
         const crossEncoderReady = await this.crossEncoderReranker.isReady();
-        return this.isEnabled && this.imageToTextModel !== null && crossEncoderReady;
+        return this.isEnabled && crossEncoderReady;
     }
     /**
      * Get the current model names being used
      */
     getModelNames() {
         return {
-            imageToText: this.imageToTextModelName,
+            imageToText: 'Xenova/vit-gpt2-image-captioning', // Fixed model via file-processor
             crossEncoder: this.crossEncoderReranker.getModelName()
         };
     }
@@ -377,8 +309,6 @@ export class TextDerivedRerankingStrategy {
      * Clean up resources
      */
     async cleanup() {
-        this.initialized = false;
-        this.imageToTextModel = null;
         await this.crossEncoderReranker.cleanup();
     }
 }

package/dist/dom-polyfills.js CHANGED Viewed

@@ -30,11 +30,8 @@ if (typeof window === 'undefined') {
     if (typeof globalThis.navigator === 'undefined') {
         globalThis.navigator = dom.window.navigator;
     }
-    // Polyfill createImageBitmap if needed (for image processing)
-    if (typeof globalThis.createImageBitmap === 'undefined') {
-        globalThis.createImageBitmap = dom.window.createImageBitmap || (() => {
-            throw new Error('createImageBitmap not available in Node.js environment');
-        });
-    }
+    // Note: Do NOT polyfill createImageBitmap with a fake implementation
+    // RawImage.fromURL() will handle image loading correctly without it
+    // Setting a fake createImageBitmap that throws errors breaks image loading
 }
 //# sourceMappingURL=dom-polyfills.js.map

package/dist/factories/text-factory.js CHANGED Viewed

@@ -421,18 +421,35 @@ export class TextIngestionFactory {
                 console.log(`📁 Creating index directory: ${indexDir}`);
                 mkdirSync(indexDir, { recursive: true });
             }
-            // Step 1: Get model-specific defaults and merge with options
-            const modelDefaults = getModelDefaults(options.embeddingModel || config.embedding_model);
+            // Step 1: Determine effective mode and select appropriate default model
+            const effectiveMode = options.mode || 'text';
+            // Step 1.5: Select model based on mode if not explicitly provided
+            let effectiveModel;
+            if (options.embeddingModel) {
+                // Use explicitly provided model
+                effectiveModel = options.embeddingModel;
+            }
+            else {
+                // Select default model based on mode
+                if (effectiveMode === 'multimodal') {
+                    const { DEFAULT_MODELS } = await import('../core/model-registry.js');
+                    effectiveModel = DEFAULT_MODELS['clip'];
+                    console.log(`📊 No model specified for multimodal mode, using default: ${effectiveModel}`);
+                }
+                else {
+                    effectiveModel = config.embedding_model;
+                }
+            }
+            // Step 2: Get model-specific defaults and merge with options
+            const modelDefaults = getModelDefaults(effectiveModel);
             const effectiveBatchSize = options.batchSize ?? modelDefaults.batch_size;
             const effectiveChunkSize = options.chunkSize ?? modelDefaults.chunk_size;
             const effectiveChunkOverlap = options.chunkOverlap ?? modelDefaults.chunk_overlap;
-            // Step 1.5: Validate mode-model compatibility at creation time
-            const effectiveMode = options.mode || 'text';
-            const effectiveModel = options.embeddingModel || config.embedding_model;
+            // Step 3: Validate mode-model compatibility at creation time
             console.log('🔍 Validating mode-model compatibility...');
             validateModeModelCompatibilityOrThrow(effectiveMode, effectiveModel);
             console.log('✓ Mode-model compatibility validated');
-            // Step 2: Initialize embedding function based on mode
+            // Step 4: Initialize embedding function based on mode
             let embedFn;
             if (effectiveMode === 'multimodal') {
                 console.log('📊 Loading CLIP embedding model for multimodal mode...');
@@ -463,10 +480,10 @@ export class TextIngestionFactory {
             await initializeSchema(db);
             console.log('✓ Database connection established');
             // Step 3.1: Handle mode storage during ingestion
-            await this.handleModeStorage(db, options, modelDefaults);
-            // Step 4: Initialize index manager
+            await this.handleModeStorage(db, options, modelDefaults, effectiveModel);
+            // Step 5: Initialize index manager
             console.log('📇 Initializing vector index...');
-            const indexManager = new IndexManager(indexPath, dbPath, modelDefaults.dimensions, options.embeddingModel || config.embedding_model);
+            const indexManager = new IndexManager(indexPath, dbPath, modelDefaults.dimensions, effectiveModel);
             // Check if we need to force recreation due to model change
             let forceRecreate = false;
             if (options.forceRebuild && existsSync(indexPath) && existsSync(dbPath)) {
@@ -477,9 +494,8 @@ export class TextIngestionFactory {
                 const tempDb = await openDatabase(dbPath);
                 try {
                     const storedModel = await getStoredModelInfo(tempDb);
-                    const currentModel = options.embeddingModel || config.embedding_model;
-                    if (storedModel && storedModel.modelName !== currentModel) {
-                        console.log(`🔄 Model change detected: ${storedModel.modelName} → ${currentModel}`);
+                    if (storedModel && storedModel.modelName !== effectiveModel) {
+                        console.log(`🔄 Model change detected: ${storedModel.modelName} → ${effectiveModel}`);
                         console.log(`🔄 Dimensions change: ${storedModel.dimensions} → ${modelDefaults.dimensions}`);
                     }
                     else if (storedModel && storedModel.dimensions !== modelDefaults.dimensions) {
@@ -503,9 +519,8 @@ export class TextIngestionFactory {
                 // Update stored model info when rebuilding or creating new index
                 if (options.forceRebuild || forceRecreate) {
                     const { setStoredModelInfo } = await import('../core/db.js');
-                    const currentModel = options.embeddingModel || config.embedding_model;
-                    await setStoredModelInfo(db, currentModel, modelDefaults.dimensions);
-                    console.log(`✓ Updated stored model info: ${currentModel} (${modelDefaults.dimensions} dimensions)`);
+                    await setStoredModelInfo(db, effectiveModel, modelDefaults.dimensions);
+                    console.log(`✓ Updated stored model info: ${effectiveModel} (${modelDefaults.dimensions} dimensions)`);
                 }
             }
             else {
@@ -555,11 +570,10 @@ export class TextIngestionFactory {
      * Creates or validates system info based on the provided mode and options
      * @private
      */
-    static async handleModeStorage(db, options, modelDefaults) {
+    static async handleModeStorage(db, options, modelDefaults, effectiveModel) {
         const { getSystemInfo, setSystemInfo } = await import('../core/db.js');
-        // Determine the effective mode and model
+        // Determine the effective mode and reranking strategy
         const effectiveMode = options.mode || 'text';
-        const effectiveModel = options.embeddingModel || config.embedding_model;
         const effectiveRerankingStrategy = options.rerankingStrategy || 'cross-encoder';
         // Determine model type based on model name
         let modelType;

package/dist/file-processor.js CHANGED Viewed

@@ -346,24 +346,35 @@ function extractTitle(content, filePath) {
  * Cache for image-to-text pipeline to avoid reloading
  */
 let imageToTextPipeline = null;
+let imageToTextPipelinePromise = null;
 /**
- * Initialize the image-to-text pipeline
+ * Initialize the image-to-text pipeline with proper async locking
  */
 async function initializeImageToTextPipeline(modelName = 'Xenova/vit-gpt2-image-captioning') {
+    // Return cached pipeline if available
     if (imageToTextPipeline) {
         return imageToTextPipeline;
     }
-    try {
-        const { pipeline } = await import('@huggingface/transformers');
-        console.log(`Loading image-to-text model: ${modelName}`);
-        imageToTextPipeline = await pipeline('image-to-text', modelName);
-        console.log(`Successfully loaded image-to-text model: ${modelName}`);
-        return imageToTextPipeline;
-    }
-    catch (error) {
-        console.error(`Failed to load image-to-text model ${modelName}:`, error);
-        throw new Error(`Failed to initialize image-to-text pipeline: ${error instanceof Error ? error.message : String(error)}`);
+    // If pipeline is currently loading, wait for it
+    if (imageToTextPipelinePromise) {
+        return imageToTextPipelinePromise;
     }
+    // Start loading pipeline
+    imageToTextPipelinePromise = (async () => {
+        try {
+            const { pipeline } = await import('@huggingface/transformers');
+            console.log(`Loading image-to-text model: ${modelName}`);
+            imageToTextPipeline = await pipeline('image-to-text', modelName);
+            console.log(`Successfully loaded image-to-text model: ${modelName}`);
+            return imageToTextPipeline;
+        }
+        catch (error) {
+            console.error(`Failed to load image-to-text model ${modelName}:`, error);
+            imageToTextPipelinePromise = null; // Reset on error so it can be retried
+            throw new Error(`Failed to initialize image-to-text pipeline: ${error instanceof Error ? error.message : String(error)}`);
+        }
+    })();
+    return imageToTextPipelinePromise;
 }
 /**
  * Parse PNG image dimensions from file buffer
@@ -545,8 +556,11 @@ async function extractImageMetadata(imagePath) {
 async function generateImageDescription(imagePath, options = DEFAULT_IMAGE_TO_TEXT_OPTIONS) {
     try {
         const pipeline = await initializeImageToTextPipeline(options.model);
-        // Generate description
-        const result = await pipeline(imagePath, {
+        // Load image using RawImage.fromURL which works with local file paths
+        const { RawImage } = await import('@huggingface/transformers');
+        const image = await RawImage.fromURL(imagePath);
+        // Generate description with loaded image
+        const result = await pipeline(image, {
             max_length: options.maxLength || 50,
             num_beams: 4,
             early_stopping: true
@@ -597,93 +611,6 @@ async function generateImageDescriptionsBatch(imagePaths, options = DEFAULT_IMAG
     }
     return results;
 }
-/**
- * Generate text descriptions for multiple images using optimized batch processing
- * Uses BatchProcessingOptimizer for memory-efficient processing of large image collections
- */
-async function generateImageDescriptionsBatchOptimized(imagePaths, options = DEFAULT_IMAGE_TO_TEXT_OPTIONS) {
-    // For small batches, use the existing implementation
-    if (imagePaths.length <= 10) {
-        return generateImageDescriptionsBatch(imagePaths, options);
-    }
-    try {
-        // Import batch processing optimizer
-        const { createImageBatchProcessor } = await import('./core/batch-processing-optimizer.js');
-        const batchProcessor = createImageBatchProcessor();
-        // Convert image paths to batch items
-        const batchItems = imagePaths.map(path => ({
-            content: path,
-            contentType: 'image',
-            metadata: { originalPath: path }
-        }));
-        // Create image description function
-        const imageDescriptionFunction = async (item) => {
-            try {
-                const result = await generateImageDescription(item.content, options);
-                return {
-                    embedding_id: `img_desc_${Date.now()}_${Math.random()}`,
-                    vector: new Float32Array([0]), // Placeholder vector
-                    contentType: 'image',
-                    metadata: {
-                        path: item.content,
-                        description: result.description,
-                        confidence: result.confidence,
-                        model: result.model
-                    }
-                };
-            }
-            catch (error) {
-                throw new Error(`Failed to generate description for ${item.content}: ${error instanceof Error ? error.message : String(error)}`);
-            }
-        };
-        // Process with optimization and progress reporting
-        const batchResult = await batchProcessor.processBatch(batchItems, imageDescriptionFunction, (stats) => {
-            console.log(`Image description progress: ${stats.processedItems}/${stats.totalItems} (${Math.round((stats.processedItems / stats.totalItems) * 100)}%)`);
-            console.log(`  Memory usage: ${stats.memoryUsageMB}MB (peak: ${stats.peakMemoryUsageMB}MB)`);
-            if (stats.failedItems > 0) {
-                console.log(`  Failed items: ${stats.failedItems}`);
-            }
-        });
-        // Log final statistics
-        console.log(`✓ Image description generation complete:`);
-        console.log(`  Processed: ${batchResult.stats.processedItems}/${batchResult.stats.totalItems}`);
-        console.log(`  Failed: ${batchResult.stats.failedItems}`);
-        console.log(`  Processing time: ${Math.round(batchResult.stats.processingTimeMs / 1000)}s`);
-        console.log(`  Rate: ${Math.round(batchResult.stats.itemsPerSecond)} images/sec`);
-        console.log(`  Peak memory usage: ${batchResult.stats.peakMemoryUsageMB}MB`);
-        if (batchResult.stats.retryCount > 0) {
-            console.log(`  Retries: ${batchResult.stats.retryCount}`);
-        }
-        // Convert results back to expected format
-        const results = [];
-        // Add successful results
-        for (const result of batchResult.results) {
-            if (result.metadata?.description) {
-                results.push({
-                    path: result.metadata.path,
-                    result: {
-                        description: result.metadata.description,
-                        confidence: result.metadata.confidence,
-                        model: result.metadata.model
-                    }
-                });
-            }
-        }
-        // Add failed results
-        for (const error of batchResult.errors) {
-            results.push({
-                path: error.item.content,
-                error: error.error
-            });
-        }
-        return results;
-    }
-    catch (error) {
-        console.warn(`Optimized batch processing failed, falling back to standard batch processing: ${error instanceof Error ? error.message : String(error)}`);
-        // Fall back to existing implementation
-        return generateImageDescriptionsBatch(imagePaths, options);
-    }
-}
 /**
  * Process image file to extract text description and metadata
  */
@@ -834,8 +761,8 @@ export async function processFiles(filePaths, pathManager, imageToTextOptions) {
     if (imageFiles.length > 0) {
         console.log(`Processing ${imageFiles.length} image files with optimized batch processing`);
         try {
-            // Use optimized batch processing for image descriptions
-            const batchResults = await generateImageDescriptionsBatchOptimized(imageFiles, imageToTextOptions);
+            // Use batch processing for image descriptions
+            const batchResults = await generateImageDescriptionsBatch(imageFiles, imageToTextOptions);
             // Convert batch results to documents with metadata extraction
             for (const batchResult of batchResults) {
                 try {
@@ -961,6 +888,7 @@ export async function cleanupImageProcessingResources() {
                 await imageToTextPipeline.dispose();
             }
             imageToTextPipeline = null;
+            imageToTextPipelinePromise = null;
             console.log('Image-to-text pipeline cleaned up');
         }
         catch (error) {

package/dist/indexer.js CHANGED Viewed

@@ -16,11 +16,14 @@ async function main() {
         console.error('  <path>    File or directory path to ingest (.md and .txt files)');
         console.error('');
         console.error('Examples:');
-        console.error('  node indexer.js ./docs/           # Ingest all .md/.txt files in docs/');
+        console.error('  node indexer.js ./docs/           # Ingest all .md/.txt/.pdf/.docs files in docs/');
         console.error('  node indexer.js ./readme.md       # Ingest single file');
         console.error('  node indexer.js ../project/docs/  # Ingest from parent directory');
         console.error('');
-        console.error('Supported file types: .md (Markdown), .txt (Plain text)');
+        console.error('Supported file types:');
+        console.error('  Text: .md, .txt, .mdx');
+        console.error('  Documents: .pdf, .docx');
+        console.error('  Images (multimodal mode): .jpg, .jpeg, .png, .gif, .webp, .bmp');
         console.error('');
         console.error('After ingestion, use: node search.js "your query"');
         process.exit(EXIT_CODES.INVALID_ARGUMENTS);

package/dist/ingestion.js CHANGED Viewed

@@ -64,7 +64,12 @@ export class IngestionPipeline {
         if (!this.corePipeline) {
             throw new Error('IngestionPipeline failed to initialize');
         }
-        return this.corePipeline.ingestFile(filePath, options);
+        // Merge mode from constructor options with runtime options
+        const mergedOptions = {
+            ...options,
+            mode: options?.mode || this.options.mode
+        };
+        return this.corePipeline.ingestFile(filePath, mergedOptions);
     }
     /**
      * Ingest all documents in a directory
@@ -74,7 +79,12 @@ export class IngestionPipeline {
         if (!this.corePipeline) {
             throw new Error('IngestionPipeline failed to initialize');
         }
-        return this.corePipeline.ingestDirectory(directoryPath, options);
+        // Merge mode from constructor options with runtime options
+        const mergedOptions = {
+            ...options,
+            mode: options?.mode || this.options.mode
+        };
+        return this.corePipeline.ingestDirectory(directoryPath, mergedOptions);
     }
     /**
      * Ingest content from memory buffer
@@ -95,7 +105,12 @@ export class IngestionPipeline {
         if (!this.corePipeline) {
             throw new Error('IngestionPipeline failed to initialize');
         }
-        return this.corePipeline.ingestFromMemory(content, metadata, options);
+        // Merge mode from constructor options with runtime options
+        const mergedOptions = {
+            ...options,
+            mode: options?.mode || this.options.mode
+        };
+        return this.corePipeline.ingestFromMemory(content, metadata, mergedOptions);
     }
     /**
      * Clean up resources

package/dist/mcp-server.js CHANGED Viewed

@@ -501,16 +501,23 @@ class RagLiteMCPServer {
             catch (error) {
                 throw new Error(`Cannot access path: ${args.path}. Check permissions.`);
             }
-            // Validate file type for single files
+            // Validate mode parameter
+            const mode = args.mode || 'text';
+            // Validate file type for single files (only formats with actual processing implementations)
             if (stats.isFile()) {
-                const validExtensions = ['.md', '.txt'];
+                const textExtensions = ['.md', '.txt', '.mdx', '.pdf', '.docx'];
+                const imageExtensions = ['.jpg', '.jpeg', '.png', '.gif', '.webp', '.bmp'];
+                const validExtensions = mode === 'multimodal'
+                    ? [...textExtensions, ...imageExtensions]
+                    : textExtensions;
                 const hasValidExtension = validExtensions.some(ext => args.path.toLowerCase().endsWith(ext));
                 if (!hasValidExtension) {
-                    throw new Error(`Unsupported file type: ${args.path}. Supported types: .md, .txt`);
+                    const supportedTypes = mode === 'multimodal'
+                        ? '.md, .txt, .mdx, .pdf, .docx, .jpg, .jpeg, .png, .gif, .webp, .bmp'
+                        : '.md, .txt, .mdx, .pdf, .docx';
+                    throw new Error(`Unsupported file type: ${args.path}. Supported types: ${supportedTypes}`);
                 }
             }
-            // Validate mode parameter
-            const mode = args.mode || 'text';
             if (!['text', 'multimodal'].includes(mode)) {
                 throw new Error(`Invalid mode: ${mode}. Supported modes: text, multimodal`);
             }
@@ -585,8 +592,8 @@ class RagLiteMCPServer {
                     chunks_per_second: result.processingTimeMs > 0 ?
                         Math.round(result.chunksCreated / (result.processingTimeMs / 1000) * 100) / 100 : 0,
                     supported_file_types: mode === 'multimodal'
-                        ? ['md', 'txt', 'jpg', 'jpeg', 'png', 'gif', 'webp']
-                        : ['md', 'txt'],
+                        ? ['md', 'txt', 'mdx', 'pdf', 'docx', 'jpg', 'jpeg', 'png', 'gif', 'webp', 'bmp']
+                        : ['md', 'txt', 'mdx', 'pdf', 'docx'],
                     success: true
                 };
                 return {
@@ -1132,7 +1139,7 @@ class RagLiteMCPServer {
                         text_search: true,
                         image_search: false,
                         multimodal_reranking: false,
-                        supported_file_types: ['md', 'txt']
+                        supported_file_types: ['md', 'txt', 'mdx', 'pdf', 'docx']
                     };
                 }
                 else if (systemInfo.mode === 'multimodal') {
@@ -1140,7 +1147,7 @@ class RagLiteMCPServer {
                         text_search: true,
                         image_search: true,
                         multimodal_reranking: true,
-                        supported_file_types: ['md', 'txt', 'jpg', 'png', 'gif', 'webp']
+                        supported_file_types: ['md', 'txt', 'mdx', 'pdf', 'docx', 'jpg', 'jpeg', 'png', 'gif', 'webp', 'bmp']
                     };
                 }
             }

package/dist/multimodal/clip-embedder.js CHANGED Viewed

@@ -339,15 +339,13 @@ export class CLIPEmbedder extends BaseUniversalEmbedder {
             throw new Error('CLIP text model or tokenizer not initialized');
         }
         try {
-            // Validate and truncate text if necessary (CLIP has a 77 token limit)
-            this.validateTextLength(text);
-            const finalProcessedText = this.truncateText(processedText);
             // Use the validated CLIPTextModelWithProjection approach (no pixel_values errors)
             // Tokenize text with CLIP's requirements
-            const tokens = await this.tokenizer(finalProcessedText, {
+            // The tokenizer handles truncation at 77 TOKENS (not characters)
+            const tokens = await this.tokenizer(processedText, {
                 padding: true,
                 truncation: true,
-                max_length: 77, // CLIP's text sequence length limit
+                max_length: 77, // CLIP's text sequence length limit (77 tokens)
                 return_tensors: 'pt'
             });
             // Log token information for debugging (only in development)
@@ -355,7 +353,7 @@ export class CLIPEmbedder extends BaseUniversalEmbedder {
                 const tokenIds = tokens.input_ids?.data || [];
                 const actualTokenCount = Array.from(tokenIds).filter((id) => id !== 0).length;
                 if (actualTokenCount >= 77) {
-                    console.warn(`Text truncated: "${finalProcessedText.substring(0, 50)}..." (${actualTokenCount}+ tokens -> 77 tokens)`);
+                    console.warn(`Text truncated by tokenizer: "${processedText.substring(0, 50)}..." (truncated to 77 tokens)`);
                 }
             }
             // Generate text embedding using CLIPTextModelWithProjection
@@ -389,15 +387,15 @@ export class CLIPEmbedder extends BaseUniversalEmbedder {
                 console.warn(`Warning: Embedding normalization may be imprecise (magnitude: ${magnitudeAfterNorm.toFixed(6)})`);
             }
             // Generate unique embedding ID
-            const embeddingId = this.generateEmbeddingId(finalProcessedText, 'text');
+            const embeddingId = this.generateEmbeddingId(processedText, 'text');
             return {
                 embedding_id: embeddingId,
                 vector: embedding,
                 contentType: 'text',
                 metadata: {
                     originalText: text,
-                    processedText: finalProcessedText,
-                    textLength: finalProcessedText.length,
+                    processedText: processedText,
+                    textLength: processedText.length,
                     embeddingMagnitudeBeforeNorm: magnitudeBeforeNorm,
                     embeddingMagnitudeAfterNorm: magnitudeAfterNorm,
                     normalized: true,
@@ -682,8 +680,9 @@ export class CLIPEmbedder extends BaseUniversalEmbedder {
                     const { createTextBatchProcessor } = await import('../core/batch-processing-optimizer.js');
                     const batchProcessor = createTextBatchProcessor();
                     // Convert to EmbeddingBatchItem format
+                    // Let tokenizer handle truncation at 77 tokens (not characters)
                     const batchItems = textItems.map(item => ({
-                        content: this.truncateText(item.content.trim()),
+                        content: item.content.trim(),
                         contentType: item.contentType,
                         metadata: item.metadata
                     }));
@@ -773,7 +772,8 @@ export class CLIPEmbedder extends BaseUniversalEmbedder {
      */
     async processBatchText(textItems) {
         // Prepare texts for batch processing
-        const texts = textItems.map(item => this.truncateText(item.content.trim()));
+        // Let tokenizer handle truncation at 77 tokens (not characters)
+        const texts = textItems.map(item => item.content.trim());
         // Tokenize all texts in batch
         const tokensBatch = await Promise.all(texts.map(text => this.tokenizer(text, {
             padding: true,

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "rag-lite-ts",
-  "version": "2.0.1",
+  "version": "2.0.3",
   "description": "Local-first TypeScript retrieval engine with Chameleon Multimodal Architecture for semantic search over text and image content",
   "type": "module",
   "main": "./dist/index.js",