npm - rag-lite-ts - Versions diffs - 2.0.5 → 2.1.1 - Mend

rag-lite-ts 2.0.5 → 2.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (29) hide show

package/README.md +815 -808
package/dist/cli/indexer.js +3 -39
package/dist/cli/search.d.ts +1 -1
package/dist/cli/search.js +123 -19
package/dist/cli.js +77 -94
package/dist/core/binary-index-format.d.ts +28 -2
package/dist/core/binary-index-format.js +196 -27
package/dist/core/db.js +173 -173
package/dist/core/ingestion.d.ts +5 -1
package/dist/core/ingestion.js +123 -18
package/dist/core/lazy-dependency-loader.d.ts +3 -8
package/dist/core/lazy-dependency-loader.js +11 -29
package/dist/core/mode-detection-service.js +1 -1
package/dist/core/reranking-config.d.ts +1 -1
package/dist/core/reranking-config.js +7 -16
package/dist/core/reranking-factory.js +3 -184
package/dist/core/search.d.ts +10 -0
package/dist/core/search.js +35 -11
package/dist/core/types.d.ts +1 -1
package/dist/core/vector-index.d.ts +4 -0
package/dist/core/vector-index.js +6 -0
package/dist/factories/ingestion-factory.js +3 -1
package/dist/file-processor.d.ts +2 -0
package/dist/file-processor.js +20 -0
package/dist/index-manager.d.ts +17 -1
package/dist/index-manager.js +148 -7
package/dist/mcp-server.js +127 -105
package/dist/multimodal/clip-embedder.js +6 -2
package/package.json +1 -1

package/dist/core/ingestion.js CHANGED Viewed

@@ -201,7 +201,23 @@ export class IngestionPipeline {
                 try {
                     // Convert MIME type to simple content type for embedding function
                     const contentTypeForEmbedding = this.getContentTypeForEmbedding(document.metadata?.contentType);
-                    const embedding = await this.embedFn(chunk.text, contentTypeForEmbedding);
+                    // For images, use the image path from metadata instead of text description
+                    let contentForEmbedding = chunk.text;
+                    if (contentTypeForEmbedding === 'image' && document.metadata) {
+                        // Try to get image path from metadata (contentPath, originalPath, or source)
+                        // contentPath is where the image is stored (from contentResult)
+                        const imagePath = document.metadata.contentPath ||
+                            document.metadata.originalPath ||
+                            document.metadata.source;
+                        if (imagePath) {
+                            contentForEmbedding = imagePath;
+                        }
+                        else {
+                            // Fallback: try to extract path from source if available
+                            console.warn(`Image chunk ${i + 1} missing image path in metadata, using text content as fallback`);
+                        }
+                    }
+                    const embedding = await this.embedFn(contentForEmbedding, contentTypeForEmbedding);
                     // Enhance embedding result with content type metadata
                     if (!embedding.contentType) {
                         embedding.contentType = contentTypeForEmbedding;
@@ -271,21 +287,30 @@ export class IngestionPipeline {
         try {
             // Phase 1: File Discovery and Processing with Content-Type Detection
             console.log('\n--- Phase 1: File Discovery and Processing ---');
-            const fileResult = await discoverAndProcessFiles(path, options.fileOptions, this.pathManager);
-            if (fileResult.documents.length === 0) {
+            const mode = options.mode || 'text';
+            const fileOptions = {
+                recursive: true,
+                maxFileSize: 10 * 1024 * 1024, // 10MB
+                ...options.fileOptions,
+                mode
+            };
+            const fileResult = await discoverAndProcessFiles(path, fileOptions, this.pathManager);
+            // Additional filtering as fallback (should be minimal with mode-aware discovery)
+            const filteredResult = this.filterDocumentsByMode(fileResult, mode);
+            if (filteredResult.documents.length === 0) {
                 console.log('No documents found to process');
                 return {
                     documentsProcessed: 0,
                     chunksCreated: 0,
                     embeddingsGenerated: 0,
-                    documentErrors: fileResult.processingResult.errors.length,
+                    documentErrors: filteredResult.processingResult.errors.length,
                     embeddingErrors: 0,
                     processingTimeMs: Date.now() - startTime,
                     contentIds: []
                 };
             }
             // Content-type detection and routing
-            const contentTypeStats = this.analyzeContentTypes(fileResult.documents);
+            const contentTypeStats = this.analyzeContentTypes(filteredResult.documents);
             console.log(`📊 Content analysis: ${contentTypeStats.text} text, ${contentTypeStats.image} image, ${contentTypeStats.other} other files`);
             // Phase 2: Document Chunking with Content-Type Awareness
             console.log('\n--- Phase 2: Document Chunking ---');
@@ -293,7 +318,7 @@ export class IngestionPipeline {
                 chunkSize: config.chunk_size,
                 chunkOverlap: config.chunk_overlap
             };
-            const chunkingResult = await this.chunkDocumentsWithContentTypes(fileResult.documents, effectiveChunkConfig, options.mode);
+            const chunkingResult = await this.chunkDocumentsWithContentTypes(filteredResult.documents, effectiveChunkConfig, options.mode);
             if (chunkingResult.totalChunks === 0) {
                 console.log('No chunks created from documents');
                 return {
@@ -318,10 +343,10 @@ export class IngestionPipeline {
             const endTime = Date.now();
             const processingTimeMs = endTime - startTime;
             const result = {
-                documentsProcessed: fileResult.documents.length,
+                documentsProcessed: filteredResult.documents.length,
                 chunksCreated: chunkingResult.totalChunks,
                 embeddingsGenerated: embeddingResult.embeddings.length,
-                documentErrors: fileResult.processingResult.errors.length,
+                documentErrors: filteredResult.processingResult.errors.length,
                 embeddingErrors: embeddingResult.errors,
                 processingTimeMs,
                 contentIds
@@ -447,7 +472,20 @@ export class IngestionPipeline {
                 try {
                     // Convert MIME type to simple content type for embedding function
                     const contentTypeForEmbedding = this.getContentTypeForEmbedding(chunk.contentType);
-                    const embedding = await this.embedFn(chunk.text, contentTypeForEmbedding);
+                    // For images, use the image path from metadata instead of text description
+                    let contentForEmbedding = chunk.text;
+                    if (contentTypeForEmbedding === 'image' && chunk.metadata) {
+                        // Try to get image path from metadata (originalPath or contentPath)
+                        const imagePath = chunk.metadata.originalPath || chunk.metadata.contentPath || chunk.metadata.source;
+                        if (imagePath) {
+                            contentForEmbedding = imagePath;
+                        }
+                        else {
+                            // Fallback: try to extract path from source if available
+                            console.warn(`Image chunk ${i + 1} missing image path in metadata, using text content as fallback`);
+                        }
+                    }
+                    const embedding = await this.embedFn(contentForEmbedding, contentTypeForEmbedding);
                     // Enhance embedding result with content type metadata if not already present
                     if (!embedding.contentType) {
                         embedding.contentType = contentTypeForEmbedding;
@@ -566,16 +604,35 @@ export class IngestionPipeline {
         return contentIds;
     }
     /**
-     * Update vector index with new embeddings
+     * Update vector index with new embeddings (supports grouped content type storage)
      */
     async updateVectorIndex(embeddings) {
+        console.log('updateVectorIndex called with', embeddings.length, 'embeddings');
         if (embeddings.length === 0) {
             console.log('No embeddings to add to vector index');
             return;
         }
         console.log(`Adding ${embeddings.length} vector${embeddings.length === 1 ? '' : 's'} to search index...`);
         try {
-            await this.indexManager.addVectors(embeddings);
+            // Group embeddings by content type for optimized storage
+            const groupedEmbeddings = embeddings.reduce((groups, embedding) => {
+                const contentType = embedding.contentType || 'text';
+                if (!groups[contentType]) {
+                    groups[contentType] = [];
+                }
+                groups[contentType].push(embedding);
+                return groups;
+            }, {});
+            const textEmbeddings = groupedEmbeddings.text || [];
+            const imageEmbeddings = groupedEmbeddings.image || [];
+            console.log(`Grouped: ${textEmbeddings.length} text, ${imageEmbeddings.length} image vectors`);
+            // Use grouped storage method if available, fallback to regular method
+            if (this.indexManager.addGroupedEmbeddings) {
+                await this.indexManager.addGroupedEmbeddings(textEmbeddings, imageEmbeddings);
+            }
+            else {
+                await this.indexManager.addVectors(embeddings);
+            }
             console.log(`✓ Vector index updated successfully with ${embeddings.length} new vectors`);
         }
         catch (error) {
@@ -583,26 +640,72 @@ export class IngestionPipeline {
             throw error;
         }
     }
+    /**
+     * Filter documents based on ingestion mode to avoid processing incompatible content types
+     */
+    filterDocumentsByMode(fileResult, mode) {
+        if (mode === 'multimodal') {
+            // In multimodal mode, keep all documents
+            return fileResult;
+        }
+        // In text mode, filter out image documents
+        const filteredDocuments = fileResult.documents.filter(doc => {
+            const contentType = doc.metadata?.contentType || 'text';
+            const isCompatible = contentType === 'text' ||
+                contentType.startsWith('text/') ||
+                contentType === 'application/pdf' ||
+                contentType === 'application/vnd.openxmlformats-officedocument.wordprocessingml.document';
+            if (!isCompatible) {
+                console.log(`⚠️ Skipping ${doc.source} (${contentType}) - not compatible with text mode`);
+            }
+            return isCompatible;
+        });
+        // Update processing result to reflect filtering
+        const filteredProcessingResult = {
+            ...fileResult.processingResult,
+            skippedFiles: [
+                ...(fileResult.processingResult.skippedFiles || []),
+                ...fileResult.documents
+                    .filter(doc => !filteredDocuments.includes(doc))
+                    .map(doc => ({
+                    path: doc.source,
+                    reason: `Content type not compatible with ${mode} mode`
+                }))
+            ]
+        };
+        return {
+            documents: filteredDocuments,
+            discoveryResult: fileResult.discoveryResult,
+            processingResult: filteredProcessingResult
+        };
+    }
     /**
      * Converts MIME type to simple content type for embedding function
      * @param mimeType - MIME type string (e.g., 'text/plain', 'image/jpeg')
      * @returns Simple content type ('text', 'image', etc.)
      */
-    getContentTypeForEmbedding(mimeType) {
-        if (!mimeType) {
+    getContentTypeForEmbedding(contentType) {
+        if (!contentType) {
+            return 'text';
+        }
+        // Handle simple content type strings (used by chunking)
+        if (contentType === 'image') {
+            return 'image';
+        }
+        else if (contentType === 'text') {
             return 'text';
         }
-        // Convert MIME types to simple content types
-        if (mimeType.startsWith('text/')) {
+        // Convert MIME types to simple content types (legacy support)
+        if (contentType.startsWith('text/')) {
             return 'text';
         }
-        else if (mimeType.startsWith('image/')) {
+        else if (contentType.startsWith('image/')) {
             return 'image';
         }
-        else if (mimeType === 'application/pdf') {
+        else if (contentType === 'application/pdf') {
             return 'text'; // PDFs are processed as text
         }
-        else if (mimeType === 'application/vnd.openxmlformats-officedocument.wordprocessingml.document') {
+        else if (contentType === 'application/vnd.openxmlformats-officedocument.wordprocessingml.document') {
             return 'text'; // DOCX files are processed as text
         }
         else {
@@ -671,6 +774,7 @@ export class IngestionPipeline {
                     contentType: 'image',
                     contentId: contentResult.contentId,
                     storageType: contentResult.storageType,
+                    contentPath: contentResult.contentPath, // Store contentPath for embedding
                     originalPath: metadata.originalPath,
                     ...imageMetadata // Spread all image metadata fields
                 }
@@ -687,6 +791,7 @@ export class IngestionPipeline {
                     contentType: 'image',
                     contentId: contentResult.contentId,
                     storageType: contentResult.storageType,
+                    contentPath: contentResult.contentPath, // Store contentPath for embedding
                     originalPath: metadata.originalPath,
                     processingError: error instanceof Error ? error.message : String(error)
                 }

package/dist/core/lazy-dependency-loader.d.ts CHANGED Viewed

@@ -59,15 +59,10 @@ export declare class LazyRerankerLoader {
      */
     static loadTextDerivedReranker(): Promise<RerankFunction>;
     /**
-     * Lazily load metadata-based reranker for multimodal mode
-     * Only imports when specifically needed
+     * Lazily load CLIP AutoProcessor for consistent image preprocessing
+     * Shares processor instances across embedder instances to ensure identical preprocessing
      */
-    static loadMetadataReranker(): Promise<RerankFunction>;
-    /**
-     * Lazily load hybrid reranker for multimodal mode
-     * Combines multiple reranking strategies (uses text-derived for now)
-     */
-    static loadHybridReranker(): Promise<RerankFunction>;
+    static loadCLIPAutoProcessor(modelName: string): Promise<any>;
     /**
      * Check if a reranker is already loaded in cache
      */

package/dist/core/lazy-dependency-loader.js CHANGED Viewed

@@ -198,32 +198,18 @@ export class LazyRerankerLoader {
         });
     }
     /**
-     * Lazily load metadata-based reranker for multimodal mode
-     * Only imports when specifically needed
+     * Lazily load CLIP AutoProcessor for consistent image preprocessing
+     * Shares processor instances across embedder instances to ensure identical preprocessing
      */
-    static async loadMetadataReranker() {
-        const cacheKey = 'reranker:metadata';
+    static async loadCLIPAutoProcessor(modelName) {
+        const cacheKey = `processor:clip:${modelName}`;
         return this.cache.getOrLoad(cacheKey, async () => {
-            console.log('🔄 Lazy loading metadata reranker (multimodal)');
-            // Dynamic import - only loaded when multimodal mode uses metadata reranking
-            const { MetadataRerankingStrategy } = await import('./reranking-strategies.js');
-            const reranker = new MetadataRerankingStrategy();
-            console.log('✅ Metadata reranker loaded');
-            return reranker.rerank.bind(reranker);
-        });
-    }
-    /**
-     * Lazily load hybrid reranker for multimodal mode
-     * Combines multiple reranking strategies (uses text-derived for now)
-     */
-    static async loadHybridReranker() {
-        const cacheKey = 'reranker:hybrid';
-        return this.cache.getOrLoad(cacheKey, async () => {
-            console.log('🔄 Lazy loading hybrid reranker (multimodal)');
-            // For now, hybrid reranking uses text-derived
-            // TODO: Implement proper hybrid reranking in future tasks
-            console.log('🔄 Hybrid reranking not yet implemented, using text-derived');
-            return this.loadTextDerivedReranker();
+            console.log(`🔄 Lazy loading CLIP AutoProcessor: ${modelName}`);
+            // Dynamic import - only loaded when CLIP models are used
+            const { AutoProcessor } = await import('@huggingface/transformers');
+            const processor = await AutoProcessor.from_pretrained(modelName);
+            console.log(`✅ CLIP AutoProcessor loaded: ${modelName}`);
+            return processor;
         });
     }
     /**
@@ -371,12 +357,8 @@ export class LazyDependencyManager {
                 return LazyRerankerLoader.loadTextReranker();
             case 'text-derived':
                 return LazyRerankerLoader.loadTextDerivedReranker();
-            case 'metadata':
-                return LazyRerankerLoader.loadMetadataReranker();
-            case 'hybrid':
-                return LazyRerankerLoader.loadHybridReranker();
             default:
-                throw new Error(`Unknown reranking strategy '${strategy}'. Supported strategies: cross-encoder, text-derived, metadata, hybrid, disabled`);
+                throw new Error(`Unknown reranking strategy '${strategy}'. Supported strategies: cross-encoder, text-derived, disabled`);
         }
     }
     /**

package/dist/core/mode-detection-service.js CHANGED Viewed

@@ -526,7 +526,7 @@ export class ModeDetectionService {
      * @private
      */
     validateRerankingStrategy(strategy) {
-        const validStrategies = ['cross-encoder', 'text-derived', 'metadata', 'hybrid', 'disabled'];
+        const validStrategies = ['cross-encoder', 'text-derived', 'disabled'];
         if (!validStrategies.includes(strategy)) {
             throw createError.validation(`Invalid reranking strategy '${strategy}'. Must be one of: ${validStrategies.join(', ')}`);
         }

package/dist/core/reranking-config.d.ts CHANGED Viewed

@@ -4,7 +4,7 @@
  * Provides straightforward configuration types and validation for different
  * reranking strategies without complex interface patterns.
  */
-export type RerankingStrategyType = 'cross-encoder' | 'text-derived' | 'metadata' | 'hybrid' | 'disabled';
+export type RerankingStrategyType = 'cross-encoder' | 'text-derived' | 'disabled';
 export interface RerankingConfig {
     strategy: RerankingStrategyType;
     model?: string;

package/dist/core/reranking-config.js CHANGED Viewed

@@ -17,15 +17,13 @@ export const DEFAULT_MULTIMODAL_RERANKING_CONFIG = {
         semantic: 0.7,
         metadata: 0.3
     },
-    fallback: 'metadata'
+    fallback: 'disabled'
 };
 // Strategy validation without complex interface patterns
 export function validateRerankingStrategy(strategy) {
     const validStrategies = [
         'cross-encoder',
         'text-derived',
-        'metadata',
-        'hybrid',
         'disabled'
     ];
     return validStrategies.includes(strategy);
@@ -36,7 +34,7 @@ export function validateRerankingConfig(config) {
         throw new Error('Reranking strategy is required');
     }
     if (!validateRerankingStrategy(config.strategy)) {
-        const validStrategies = ['cross-encoder', 'text-derived', 'metadata', 'hybrid', 'disabled'];
+        const validStrategies = ['cross-encoder', 'text-derived', 'disabled'];
         throw new Error(`Invalid reranking strategy '${config.strategy}'. ` +
             `Valid strategies: ${validStrategies.join(', ')}`);
     }
@@ -52,23 +50,16 @@ export function validateRerankingConfig(config) {
         if (visual !== undefined && (visual < 0 || visual > 1)) {
             throw new Error('Visual weight must be between 0 and 1');
         }
-        // Ensure weights sum to reasonable value for hybrid strategy
-        if (config.strategy === 'hybrid') {
-            const totalWeight = (semantic || 0) + (metadata || 0) + (visual || 0);
-            if (totalWeight === 0) {
-                throw new Error('Hybrid strategy requires at least one weight to be greater than 0');
-            }
-        }
     }
     // Validate fallback strategy if provided
     if (config.fallback && !validateRerankingStrategy(config.fallback)) {
-        const validStrategies = ['cross-encoder', 'text-derived', 'metadata', 'hybrid', 'disabled'];
+        const validStrategies = ['cross-encoder', 'text-derived', 'disabled'];
         throw new Error(`Invalid fallback strategy '${config.fallback}'. ` +
             `Valid strategies: ${validStrategies.join(', ')}`);
     }
     return {
         strategy: config.strategy,
-        enabled: config.enabled ?? true,
+        enabled: config.strategy === 'disabled' ? false : (config.enabled ?? true),
         model: config.model,
         weights: config.weights,
         fallback: config.fallback || 'disabled'
@@ -91,7 +82,7 @@ export function isStrategySupported(strategy, mode) {
         case 'text':
             return strategy === 'cross-encoder' || strategy === 'disabled';
         case 'multimodal':
-            return ['text-derived', 'metadata', 'hybrid', 'disabled'].includes(strategy);
+            return ['text-derived', 'disabled'].includes(strategy);
         default:
             return false;
     }
@@ -102,7 +93,7 @@ export function getSupportedStrategies(mode) {
         case 'text':
             return ['cross-encoder', 'disabled'];
         case 'multimodal':
-            return ['text-derived', 'metadata', 'hybrid', 'disabled'];
+            return ['text-derived', 'disabled'];
         default:
             return ['disabled'];
     }
@@ -145,7 +136,7 @@ export class RerankingConfigBuilder {
             .strategy('text-derived')
             .enabled(true)
             .weights({ semantic: 0.7, metadata: 0.3 })
-            .fallback('metadata');
+            .fallback('disabled');
     }
     static disabled() {
         return new RerankingConfigBuilder()

package/dist/core/reranking-factory.js CHANGED Viewed

@@ -6,7 +6,7 @@
  * principle of using simple functions over complex factory patterns.
  */
 import { getDefaultRerankingConfig, isStrategySupported, getSupportedStrategies, validateRerankingConfig } from './reranking-config.js';
-import { createCrossEncoderRerankFunction, createTextDerivedRerankFunction, createMetadataRerankFunction } from './reranking-strategies.js';
+import { createCrossEncoderRerankFunction, createTextDerivedRerankFunction } from './reranking-strategies.js';
 /**
  * Simple reranking creation function with conditional logic
  *
@@ -102,23 +102,6 @@ function createRerankingFunction(mode, strategy, config) {
                 undefined // Use default cross-encoder model
                 );
                 break;
-            case 'metadata':
-                console.log(`Creating metadata reranker for ${mode} mode`);
-                reranker = createMetadataRerankFunction({
-                    weights: config.weights ? {
-                        filename: config.weights.metadata || 0.4,
-                        contentType: 0.3,
-                        metadata: config.weights.metadata || 0.3
-                    } : undefined
-                });
-                break;
-            case 'hybrid':
-                if (mode !== 'multimodal') {
-                    throw new RerankingStrategyError(strategy, mode, 'Hybrid strategy only supported in multimodal mode', 'UNSUPPORTED_MODE');
-                }
-                console.log('Creating hybrid reranker for multimodal mode');
-                reranker = createHybridRerankFunction(config);
-                break;
             case 'disabled':
                 console.log('Reranking explicitly disabled');
                 return undefined;
@@ -241,172 +224,10 @@ function wrapRerankFunctionWithErrorRecovery(reranker, strategy, mode) {
     };
 }
 /**
- * Create hybrid reranking function that combines multiple strategies with enhanced error recovery
+ * Hybrid reranking strategy removed in Phase 3 - throwing error for backward compatibility
  */
 function createHybridRerankFunction(config) {
-    // Default weights if not specified
-    const weights = config.weights || {
-        semantic: 0.6,
-        metadata: 0.4,
-        visual: 0.0 // Not implemented yet
-    };
-    // Track which strategies are available
-    const availableStrategies = {};
-    // Initialize strategies with error handling
-    try {
-        if (weights.semantic && weights.semantic > 0) {
-            availableStrategies.textDerived = createTextDerivedRerankFunction();
-            console.log('✅ Text-derived strategy initialized for hybrid reranking');
-        }
-    }
-    catch (error) {
-        console.warn(`⚠️ Text-derived strategy initialization failed for hybrid reranking: ${error instanceof Error ? error.message : 'Unknown error'}`);
-    }
-    try {
-        if (weights.metadata && weights.metadata > 0) {
-            availableStrategies.metadata = createMetadataRerankFunction();
-            console.log('✅ Metadata strategy initialized for hybrid reranking');
-        }
-    }
-    catch (error) {
-        console.warn(`⚠️ Metadata strategy initialization failed for hybrid reranking: ${error instanceof Error ? error.message : 'Unknown error'}`);
-    }
-    // Check if any strategies are available
-    const hasAvailableStrategies = Object.keys(availableStrategies).length > 0;
-    if (!hasAvailableStrategies) {
-        throw new RerankingStrategyError('hybrid', 'multimodal', 'No hybrid reranking strategies could be initialized', 'NO_STRATEGIES_AVAILABLE');
-    }
-    console.log(`Hybrid reranking initialized with ${Object.keys(availableStrategies).length} available strategies`);
-    return async (query, results, contentType) => {
-        const startTime = Date.now();
-        const strategyResults = {};
-        try {
-            console.log(`🔄 Running hybrid reranking with ${Object.keys(availableStrategies).length} strategies`);
-            // Start with original results
-            let hybridResults = [...results];
-            let successfulStrategies = 0;
-            // Apply text-derived reranking if available and enabled
-            if (availableStrategies.textDerived && weights.semantic && weights.semantic > 0) {
-                const strategyStartTime = Date.now();
-                try {
-                    console.log(`🔧 Applying text-derived reranking (weight: ${weights.semantic})`);
-                    const textDerivedResults = await availableStrategies.textDerived(query, hybridResults, contentType);
-                    // Combine scores with semantic weight
-                    hybridResults = hybridResults.map((result, index) => {
-                        const textDerivedScore = textDerivedResults[index]?.score || result.score;
-                        const combinedScore = result.score * (1 - weights.semantic) + textDerivedScore * weights.semantic;
-                        return {
-                            ...result,
-                            score: combinedScore,
-                            metadata: {
-                                ...result.metadata,
-                                hybridScores: {
-                                    ...(result.metadata?.hybridScores || {}),
-                                    textDerived: textDerivedScore,
-                                    semantic: combinedScore
-                                }
-                            }
-                        };
-                    });
-                    const strategyDuration = Date.now() - strategyStartTime;
-                    strategyResults.textDerived = { success: true, duration: strategyDuration };
-                    successfulStrategies++;
-                    console.log(`✅ Text-derived reranking completed (${strategyDuration}ms)`);
-                }
-                catch (error) {
-                    const strategyDuration = Date.now() - strategyStartTime;
-                    const errorMessage = error instanceof Error ? error.message : 'Unknown error';
-                    strategyResults.textDerived = { success: false, error: errorMessage, duration: strategyDuration };
-                    console.warn(`❌ Text-derived reranking failed in hybrid mode (${strategyDuration}ms): ${errorMessage}`);
-                }
-            }
-            // Apply metadata reranking if available and enabled
-            if (availableStrategies.metadata && weights.metadata && weights.metadata > 0) {
-                const strategyStartTime = Date.now();
-                try {
-                    console.log(`🔧 Applying metadata reranking (weight: ${weights.metadata})`);
-                    const metadataResults = await availableStrategies.metadata(query, hybridResults, contentType);
-                    // Combine scores with metadata weight
-                    hybridResults = hybridResults.map((result, index) => {
-                        const metadataScore = metadataResults[index]?.score || result.score;
-                        const currentScore = result.score;
-                        const combinedScore = currentScore * (1 - weights.metadata) + metadataScore * weights.metadata;
-                        return {
-                            ...result,
-                            score: combinedScore,
-                            metadata: {
-                                ...result.metadata,
-                                hybridScores: {
-                                    ...(result.metadata?.hybridScores || {}),
-                                    metadata: metadataScore,
-                                    combined: combinedScore
-                                }
-                            }
-                        };
-                    });
-                    const strategyDuration = Date.now() - strategyStartTime;
-                    strategyResults.metadata = { success: true, duration: strategyDuration };
-                    successfulStrategies++;
-                    console.log(`✅ Metadata reranking completed (${strategyDuration}ms)`);
-                }
-                catch (error) {
-                    const strategyDuration = Date.now() - strategyStartTime;
-                    const errorMessage = error instanceof Error ? error.message : 'Unknown error';
-                    strategyResults.metadata = { success: false, error: errorMessage, duration: strategyDuration };
-                    console.warn(`❌ Metadata reranking failed in hybrid mode (${strategyDuration}ms): ${errorMessage}`);
-                }
-            }
-            // Sort by final combined scores
-            hybridResults.sort((a, b) => b.score - a.score);
-            const totalDuration = Date.now() - startTime;
-            // Add hybrid reranking metadata to results
-            hybridResults = hybridResults.map(result => ({
-                ...result,
-                metadata: {
-                    ...result.metadata,
-                    hybridRerankingInfo: {
-                        totalDuration,
-                        successfulStrategies,
-                        strategyResults,
-                        weights
-                    }
-                }
-            }));
-            if (successfulStrategies > 0) {
-                console.log(`✅ Hybrid reranking completed successfully (${totalDuration}ms, ${successfulStrategies}/${Object.keys(availableStrategies).length} strategies succeeded)`);
-            }
-            else {
-                console.warn(`⚠️ Hybrid reranking completed with no successful strategies (${totalDuration}ms), returning original results`);
-                return results; // Return original results if no strategies succeeded
-            }
-            return hybridResults;
-        }
-        catch (error) {
-            const totalDuration = Date.now() - startTime;
-            const errorMessage = error instanceof Error ? error.message : 'Unknown error';
-            console.warn(`❌ Hybrid reranking failed (${totalDuration}ms): ${errorMessage}. ` +
-                `Returning original results.`);
-            // Log detailed error information
-            console.error('Hybrid reranking error details:', {
-                query: query.substring(0, 100) + (query.length > 100 ? '...' : ''),
-                resultCount: results.length,
-                contentType,
-                availableStrategies: Object.keys(availableStrategies),
-                weights,
-                strategyResults,
-                error: errorMessage
-            });
-            return results.map(result => ({
-                ...result,
-                metadata: {
-                    ...result.metadata,
-                    hybridRerankingFailed: true,
-                    hybridRerankingError: errorMessage,
-                    fallbackToVectorSimilarity: true
-                }
-            }));
-        }
-    };
+    throw new RerankingStrategyError('hybrid', 'multimodal', 'Hybrid reranking strategy has been removed in this version. Use text-derived instead.', 'STRATEGY_REMOVED');
 }
 /**
  * Create reranker with automatic mode detection
@@ -582,8 +403,6 @@ export function getRerankingStats() {
         strategiesUsed: {
             'cross-encoder': 0,
             'text-derived': 0,
-            'metadata': 0,
-            'hybrid': 0,
             'disabled': 0
         }
     };

package/dist/core/search.d.ts CHANGED Viewed

@@ -80,6 +80,16 @@ export declare class SearchEngine {
      * @returns Promise resolving to array of search results
      */
     search(query: string, options?: SearchOptions): Promise<SearchResult[]>;
+    /**
+     * Perform semantic search using a pre-computed embedding vector
+     * Useful for image-based search or when embedding is computed externally
+     * @param queryVector - Pre-computed query embedding vector
+     * @param options - Search options including top_k and rerank settings
+     * @param originalQuery - Optional original query for reranking (text or image path)
+     * @param embeddingTime - Optional embedding time for logging
+     * @returns Promise resolving to array of search results
+     */
+    searchWithVector(queryVector: Float32Array, options?: SearchOptions, originalQuery?: string, embeddingTime?: number): Promise<SearchResult[]>;
     /**
      * Format search results with proper structure
      * @param chunks - Database chunks with metadata