npm - rag-lite-ts - Versions diffs - 1.0.2 → 2.0.0 - Mend

rag-lite-ts 1.0.2 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (202) hide show

package/README.md +606 -93
package/dist/cli/indexer.js +192 -4
package/dist/cli/search.js +50 -11
package/dist/cli.js +183 -26
package/dist/core/abstract-embedder.d.ts +125 -0
package/dist/core/abstract-embedder.js +264 -0
package/dist/core/actionable-error-messages.d.ts +60 -0
package/dist/core/actionable-error-messages.js +397 -0
package/dist/core/batch-processing-optimizer.d.ts +155 -0
package/dist/core/batch-processing-optimizer.js +541 -0
package/dist/core/chunker.d.ts +2 -0
package/dist/core/cli-database-utils.d.ts +53 -0
package/dist/core/cli-database-utils.js +239 -0
package/dist/core/config.js +10 -3
package/dist/core/content-errors.d.ts +111 -0
package/dist/core/content-errors.js +362 -0
package/dist/core/content-manager.d.ts +343 -0
package/dist/core/content-manager.js +1504 -0
package/dist/core/content-performance-optimizer.d.ts +150 -0
package/dist/core/content-performance-optimizer.js +516 -0
package/dist/core/content-resolver.d.ts +104 -0
package/dist/core/content-resolver.js +285 -0
package/dist/core/cross-modal-search.d.ts +164 -0
package/dist/core/cross-modal-search.js +342 -0
package/dist/core/database-connection-manager.d.ts +109 -0
package/dist/core/database-connection-manager.js +304 -0
package/dist/core/db.d.ts +141 -2
package/dist/core/db.js +631 -89
package/dist/core/embedder-factory.d.ts +176 -0
package/dist/core/embedder-factory.js +338 -0
package/dist/core/index.d.ts +3 -1
package/dist/core/index.js +4 -1
package/dist/core/ingestion.d.ts +85 -15
package/dist/core/ingestion.js +510 -45
package/dist/core/lazy-dependency-loader.d.ts +152 -0
package/dist/core/lazy-dependency-loader.js +453 -0
package/dist/core/mode-detection-service.d.ts +150 -0
package/dist/core/mode-detection-service.js +565 -0
package/dist/core/mode-model-validator.d.ts +92 -0
package/dist/core/mode-model-validator.js +203 -0
package/dist/core/model-registry.d.ts +120 -0
package/dist/core/model-registry.js +415 -0
package/dist/core/model-validator.d.ts +217 -0
package/dist/core/model-validator.js +782 -0
package/dist/core/polymorphic-search-factory.d.ts +154 -0
package/dist/core/polymorphic-search-factory.js +344 -0
package/dist/core/raglite-paths.d.ts +121 -0
package/dist/core/raglite-paths.js +145 -0
package/dist/core/reranking-config.d.ts +42 -0
package/dist/core/reranking-config.js +156 -0
package/dist/core/reranking-factory.d.ts +92 -0
package/dist/core/reranking-factory.js +591 -0
package/dist/core/reranking-strategies.d.ts +325 -0
package/dist/core/reranking-strategies.js +720 -0
package/dist/core/resource-cleanup.d.ts +163 -0
package/dist/core/resource-cleanup.js +371 -0
package/dist/core/resource-manager.d.ts +212 -0
package/dist/core/resource-manager.js +564 -0
package/dist/core/search.d.ts +28 -1
package/dist/core/search.js +83 -5
package/dist/core/streaming-operations.d.ts +145 -0
package/dist/core/streaming-operations.js +409 -0
package/dist/core/types.d.ts +3 -0
package/dist/core/universal-embedder.d.ts +177 -0
package/dist/core/universal-embedder.js +139 -0
package/dist/core/validation-messages.d.ts +99 -0
package/dist/core/validation-messages.js +334 -0
package/dist/core/vector-index.js +7 -8
package/dist/factories/index.d.ts +1 -1
package/dist/factories/text-factory.d.ts +128 -34
package/dist/factories/text-factory.js +346 -97
package/dist/file-processor.d.ts +88 -2
package/dist/file-processor.js +720 -17
package/dist/index.d.ts +9 -0
package/dist/index.js +11 -0
package/dist/ingestion.d.ts +16 -0
package/dist/ingestion.js +21 -0
package/dist/mcp-server.d.ts +35 -3
package/dist/mcp-server.js +1107 -31
package/dist/multimodal/clip-embedder.d.ts +314 -0
package/dist/multimodal/clip-embedder.js +945 -0
package/dist/multimodal/index.d.ts +6 -0
package/dist/multimodal/index.js +6 -0
package/dist/run-error-recovery-tests.d.ts +7 -0
package/dist/run-error-recovery-tests.js +101 -0
package/dist/search.d.ts +26 -0
package/dist/search.js +54 -1
package/dist/test-utils.d.ts +8 -26
package/dist/text/chunker.d.ts +1 -0
package/dist/text/embedder.js +15 -8
package/dist/text/index.d.ts +1 -0
package/dist/text/index.js +1 -0
package/dist/text/reranker.d.ts +1 -2
package/dist/text/reranker.js +17 -47
package/dist/text/sentence-transformer-embedder.d.ts +96 -0
package/dist/text/sentence-transformer-embedder.js +340 -0
package/dist/types.d.ts +39 -0
package/dist/utils/vector-math.d.ts +31 -0
package/dist/utils/vector-math.js +70 -0
package/package.json +15 -3
package/dist/api-errors.d.ts.map +0 -1
package/dist/api-errors.js.map +0 -1
package/dist/cli/indexer.d.ts.map +0 -1
package/dist/cli/indexer.js.map +0 -1
package/dist/cli/search.d.ts.map +0 -1
package/dist/cli/search.js.map +0 -1
package/dist/cli.d.ts.map +0 -1
package/dist/cli.js.map +0 -1
package/dist/config.d.ts.map +0 -1
package/dist/config.js.map +0 -1
package/dist/core/adapters.d.ts.map +0 -1
package/dist/core/adapters.js.map +0 -1
package/dist/core/chunker.d.ts.map +0 -1
package/dist/core/chunker.js.map +0 -1
package/dist/core/config.d.ts.map +0 -1
package/dist/core/config.js.map +0 -1
package/dist/core/db.d.ts.map +0 -1
package/dist/core/db.js.map +0 -1
package/dist/core/error-handler.d.ts.map +0 -1
package/dist/core/error-handler.js.map +0 -1
package/dist/core/index.d.ts.map +0 -1
package/dist/core/index.js.map +0 -1
package/dist/core/ingestion.d.ts.map +0 -1
package/dist/core/ingestion.js.map +0 -1
package/dist/core/interfaces.d.ts.map +0 -1
package/dist/core/interfaces.js.map +0 -1
package/dist/core/path-manager.d.ts.map +0 -1
package/dist/core/path-manager.js.map +0 -1
package/dist/core/search-example.d.ts +0 -25
package/dist/core/search-example.d.ts.map +0 -1
package/dist/core/search-example.js +0 -138
package/dist/core/search-example.js.map +0 -1
package/dist/core/search-pipeline-example.d.ts +0 -21
package/dist/core/search-pipeline-example.d.ts.map +0 -1
package/dist/core/search-pipeline-example.js +0 -188
package/dist/core/search-pipeline-example.js.map +0 -1
package/dist/core/search-pipeline.d.ts.map +0 -1
package/dist/core/search-pipeline.js.map +0 -1
package/dist/core/search.d.ts.map +0 -1
package/dist/core/search.js.map +0 -1
package/dist/core/types.d.ts.map +0 -1
package/dist/core/types.js.map +0 -1
package/dist/core/vector-index.d.ts.map +0 -1
package/dist/core/vector-index.js.map +0 -1
package/dist/dom-polyfills.d.ts.map +0 -1
package/dist/dom-polyfills.js.map +0 -1
package/dist/examples/clean-api-examples.d.ts +0 -44
package/dist/examples/clean-api-examples.d.ts.map +0 -1
package/dist/examples/clean-api-examples.js +0 -206
package/dist/examples/clean-api-examples.js.map +0 -1
package/dist/factories/index.d.ts.map +0 -1
package/dist/factories/index.js.map +0 -1
package/dist/factories/text-factory.d.ts.map +0 -1
package/dist/factories/text-factory.js.map +0 -1
package/dist/file-processor.d.ts.map +0 -1
package/dist/file-processor.js.map +0 -1
package/dist/index-manager.d.ts.map +0 -1
package/dist/index-manager.js.map +0 -1
package/dist/index.d.ts.map +0 -1
package/dist/index.js.map +0 -1
package/dist/indexer.d.ts.map +0 -1
package/dist/indexer.js.map +0 -1
package/dist/ingestion.d.ts.map +0 -1
package/dist/ingestion.js.map +0 -1
package/dist/mcp-server.d.ts.map +0 -1
package/dist/mcp-server.js.map +0 -1
package/dist/preprocess.d.ts.map +0 -1
package/dist/preprocess.js.map +0 -1
package/dist/preprocessors/index.d.ts.map +0 -1
package/dist/preprocessors/index.js.map +0 -1
package/dist/preprocessors/mdx.d.ts.map +0 -1
package/dist/preprocessors/mdx.js.map +0 -1
package/dist/preprocessors/mermaid.d.ts.map +0 -1
package/dist/preprocessors/mermaid.js.map +0 -1
package/dist/preprocessors/registry.d.ts.map +0 -1
package/dist/preprocessors/registry.js.map +0 -1
package/dist/search-standalone.d.ts.map +0 -1
package/dist/search-standalone.js.map +0 -1
package/dist/search.d.ts.map +0 -1
package/dist/search.js.map +0 -1
package/dist/test-utils.d.ts.map +0 -1
package/dist/test-utils.js.map +0 -1
package/dist/text/chunker.d.ts.map +0 -1
package/dist/text/chunker.js.map +0 -1
package/dist/text/embedder.d.ts.map +0 -1
package/dist/text/embedder.js.map +0 -1
package/dist/text/index.d.ts.map +0 -1
package/dist/text/index.js.map +0 -1
package/dist/text/preprocessors/index.d.ts.map +0 -1
package/dist/text/preprocessors/index.js.map +0 -1
package/dist/text/preprocessors/mdx.d.ts.map +0 -1
package/dist/text/preprocessors/mdx.js.map +0 -1
package/dist/text/preprocessors/mermaid.d.ts.map +0 -1
package/dist/text/preprocessors/mermaid.js.map +0 -1
package/dist/text/preprocessors/registry.d.ts.map +0 -1
package/dist/text/preprocessors/registry.js.map +0 -1
package/dist/text/reranker.d.ts.map +0 -1
package/dist/text/reranker.js.map +0 -1
package/dist/text/tokenizer.d.ts.map +0 -1
package/dist/text/tokenizer.js.map +0 -1
package/dist/types.d.ts.map +0 -1
package/dist/types.js.map +0 -1

package/dist/core/validation-messages.js ADDED Viewed

@@ -0,0 +1,334 @@
+/**
+ * CORE MODULE — Validation Messages and Error Descriptions
+ * Comprehensive error messages and user guidance for model validation
+ * Provides helpful, actionable error messages with troubleshooting steps
+ */
+import { ModelRegistry } from './model-registry.js';
+// =============================================================================
+// ERROR MESSAGE TEMPLATES
+// =============================================================================
+/**
+ * Error message templates for different validation scenarios
+ */
+export const ERROR_MESSAGES = {
+    MODEL_NOT_FOUND: (modelName, suggestions) => ({
+        title: `Model '${modelName}' not found`,
+        description: `The specified model is not supported by the Chameleon architecture.`,
+        details: [
+            `Model '${modelName}' is not in the supported models registry.`,
+            `This could be due to a typo in the model name or the model not being compatible with transformers.js.`
+        ],
+        suggestions: suggestions.length > 0 ? [
+            `Did you mean one of these models?`,
+            ...suggestions.map(s => `  • ${s}`)
+        ] : [
+            `Available models:`,
+            ...ModelRegistry.getSupportedModels().map(s => `  • ${s}`)
+        ],
+        actions: [
+            `Check the model name for typos`,
+            `Use 'ModelRegistry.getSupportedModels()' to see all available models`,
+            `Visit the documentation for the latest supported models list`
+        ]
+    }),
+    TRANSFORMERS_VERSION_INCOMPATIBLE: (modelName, required, current) => ({
+        title: `Transformers.js version incompatible`,
+        description: `Model '${modelName}' requires a newer version of transformers.js.`,
+        details: [
+            `Required version: ${required}`,
+            `Current version: ${current}`,
+            `The model uses features not available in the current transformers.js version.`
+        ],
+        suggestions: [
+            `Upgrade transformers.js to the latest version:`,
+            `  npm install @huggingface/transformers@latest`,
+            ``,
+            `Or install a specific compatible version:`,
+            `  npm install @huggingface/transformers@${required.replace(/[>=<~^]/g, '')}`
+        ],
+        actions: [
+            `Update your package.json dependencies`,
+            `Run npm install to update transformers.js`,
+            `Restart your application after updating`
+        ]
+    }),
+    INSUFFICIENT_MEMORY: (modelName, required, available) => ({
+        title: `Insufficient memory for model`,
+        description: `Model '${modelName}' requires more memory than available.`,
+        details: [
+            `Required memory: ${required}MB`,
+            `Available memory: ${available}MB`,
+            `Shortfall: ${required - available}MB`
+        ],
+        suggestions: [
+            `Consider using a smaller model variant:`,
+            ...ModelRegistry.getSupportedModels().filter(name => {
+                const info = ModelRegistry.getModelInfo(name);
+                return info &&
+                    info.requirements.minimumMemory &&
+                    info.requirements.minimumMemory <= available;
+            }).map(name => `  • ${name}`),
+            ``,
+            `Or increase available memory by:`,
+            `  • Closing other applications`,
+            `  • Increasing Node.js memory limit: --max-old-space-size=${required + 512}`,
+            `  • Using a machine with more RAM`
+        ],
+        actions: [
+            `Free up system memory`,
+            `Choose a more memory-efficient model`,
+            `Consider using model quantization if available`
+        ]
+    }),
+    PLATFORM_UNSUPPORTED: (modelName, currentPlatform, supportedPlatforms) => ({
+        title: `Platform not supported`,
+        description: `Model '${modelName}' is not supported on ${currentPlatform}.`,
+        details: [
+            `Current platform: ${currentPlatform}`,
+            `Supported platforms: ${supportedPlatforms.join(', ')}`,
+            `The model may use platform-specific features or optimizations.`
+        ],
+        suggestions: [
+            `Try running on a supported platform:`,
+            ...supportedPlatforms.map(platform => `  • ${platform}`),
+            ``,
+            `Or use a platform-agnostic model:`,
+            ...ModelRegistry.getSupportedModels().filter(name => {
+                const info = ModelRegistry.getModelInfo(name);
+                return info &&
+                    info.requirements.platformSupport &&
+                    info.requirements.platformSupport.includes(currentPlatform);
+            }).slice(0, 3).map(name => `  • ${name}`)
+        ],
+        actions: [
+            `Switch to a supported platform`,
+            `Use a different model that supports your platform`,
+            `Check if there are platform-specific installation instructions`
+        ]
+    }),
+    FEATURES_MISSING: (modelName, missingFeatures) => ({
+        title: `Required features not available`,
+        description: `Model '${modelName}' requires features not available in current transformers.js version.`,
+        details: [
+            `Missing features: ${missingFeatures.join(', ')}`,
+            `These features are required for the model to function properly.`
+        ],
+        suggestions: [
+            `Upgrade transformers.js to get missing features:`,
+            `  npm install @huggingface/transformers@latest`,
+            ``,
+            `Or use a model that doesn't require these features:`,
+            ...ModelRegistry.getSupportedModels().filter(name => {
+                const info = ModelRegistry.getModelInfo(name);
+                return info &&
+                    (!info.requirements.requiredFeatures ||
+                        info.requirements.requiredFeatures.every(f => !missingFeatures.includes(f)));
+            }).slice(0, 3).map(name => `  • ${name}`)
+        ],
+        actions: [
+            `Update transformers.js to the latest version`,
+            `Check the transformers.js changelog for feature availability`,
+            `Use an alternative model with fewer feature requirements`
+        ]
+    }),
+    CONTENT_TYPE_UNSUPPORTED: (contentType, modelName, supportedTypes) => ({
+        title: `Content type not supported`,
+        description: `Model '${modelName}' does not support '${contentType}' content.`,
+        details: [
+            `Requested content type: ${contentType}`,
+            `Supported content types: ${supportedTypes.join(', ')}`,
+            `The model was not trained to handle this type of content.`
+        ],
+        suggestions: [
+            `Use a model that supports '${contentType}' content:`,
+            ...ModelRegistry.getModelsByContentType(contentType).slice(0, 3).map(name => `  • ${name}`),
+            ``,
+            `Or convert your content to a supported type:`,
+            ...supportedTypes.map(type => `  • Convert to ${type}`)
+        ],
+        actions: [
+            `Choose a multimodal model for mixed content types`,
+            `Preprocess your content to match supported types`,
+            `Use separate models for different content types`
+        ]
+    })
+};
+// =============================================================================
+// WARNING MESSAGE TEMPLATES
+// =============================================================================
+/**
+ * Warning message templates for non-critical issues
+ */
+export const WARNING_MESSAGES = {
+    HIGH_MEMORY_USAGE: (modelName, memoryMB) => ({
+        title: `High memory usage`,
+        message: `Model '${modelName}' requires ${memoryMB}MB of memory, which may impact performance.`,
+        suggestions: [
+            `Monitor system memory usage during operation`,
+            `Consider using a smaller model variant if performance is affected`,
+            `Ensure sufficient swap space is available`
+        ]
+    }),
+    LIMITED_BATCH_SIZE: (modelName, maxBatchSize) => ({
+        title: `Limited batch processing`,
+        message: `Model '${modelName}' supports maximum batch size of ${maxBatchSize}.`,
+        suggestions: [
+            `Use smaller batch sizes for optimal performance`,
+            `Process large datasets in smaller chunks`,
+            `Consider parallel processing with multiple model instances`
+        ]
+    }),
+    EXPERIMENTAL_FEATURES: (modelName, features) => ({
+        title: `Experimental features in use`,
+        message: `Model '${modelName}' uses experimental features: ${features.join(', ')}.`,
+        suggestions: [
+            `Test thoroughly before using in production`,
+            `Monitor for unexpected behavior or errors`,
+            `Have fallback options ready`,
+            `Check for updates that may stabilize these features`
+        ]
+    }),
+    PERFORMANCE_IMPACT: (modelName, reason) => ({
+        title: `Potential performance impact`,
+        message: `Model '${modelName}' may have reduced performance: ${reason}.`,
+        suggestions: [
+            `Monitor processing times and resource usage`,
+            `Consider using GPU acceleration if available`,
+            `Optimize batch sizes for your use case`,
+            `Profile your application to identify bottlenecks`
+        ]
+    })
+};
+// =============================================================================
+// MESSAGE FORMATTING UTILITIES
+// =============================================================================
+/**
+ * Format an error message for console output
+ */
+export function formatErrorMessage(error) {
+    const lines = [];
+    lines.push(`❌ ${error.title}`);
+    lines.push('');
+    lines.push(error.description);
+    if (error.details.length > 0) {
+        lines.push('');
+        lines.push('Details:');
+        error.details.forEach(detail => lines.push(`  ${detail}`));
+    }
+    if (error.suggestions.length > 0) {
+        lines.push('');
+        lines.push('Suggestions:');
+        error.suggestions.forEach(suggestion => lines.push(`  ${suggestion}`));
+    }
+    if (error.actions.length > 0) {
+        lines.push('');
+        lines.push('Actions:');
+        error.actions.forEach((action, index) => lines.push(`  ${index + 1}. ${action}`));
+    }
+    return lines.join('\n');
+}
+/**
+ * Format a warning message for console output
+ */
+export function formatWarningMessage(warning) {
+    const lines = [];
+    lines.push(`⚠️  ${warning.title}`);
+    lines.push('');
+    lines.push(warning.message);
+    if (warning.suggestions.length > 0) {
+        lines.push('');
+        lines.push('Suggestions:');
+        warning.suggestions.forEach(suggestion => lines.push(`  • ${suggestion}`));
+    }
+    return lines.join('\n');
+}
+/**
+ * Create a comprehensive error message for model validation failure
+ */
+export function createValidationErrorMessage(modelName, errorType, context = {}) {
+    switch (errorType) {
+        case 'not_found':
+            return formatErrorMessage(ERROR_MESSAGES.MODEL_NOT_FOUND(modelName, context.suggestions || []));
+        case 'version_incompatible':
+            return formatErrorMessage(ERROR_MESSAGES.TRANSFORMERS_VERSION_INCOMPATIBLE(modelName, context.required || 'unknown', context.current || 'unknown'));
+        case 'insufficient_memory':
+            return formatErrorMessage(ERROR_MESSAGES.INSUFFICIENT_MEMORY(modelName, context.required || 0, context.available || 0));
+        case 'platform_unsupported':
+            return formatErrorMessage(ERROR_MESSAGES.PLATFORM_UNSUPPORTED(modelName, context.currentPlatform || 'unknown', context.supportedPlatforms || []));
+        case 'features_missing':
+            return formatErrorMessage(ERROR_MESSAGES.FEATURES_MISSING(modelName, context.missingFeatures || []));
+        case 'content_type_unsupported':
+            return formatErrorMessage(ERROR_MESSAGES.CONTENT_TYPE_UNSUPPORTED(context.contentType || 'unknown', modelName, context.supportedTypes || []));
+        default:
+            return `❌ Validation failed for model '${modelName}': ${errorType}`;
+    }
+}
+/**
+ * Create helpful suggestions based on model type and use case
+ */
+export function createModelSuggestions(modelType, contentTypes, memoryLimit) {
+    const suggestions = [];
+    if (modelType === 'sentence-transformer') {
+        suggestions.push('For text-only tasks, sentence-transformers are most efficient');
+        suggestions.push('all-MiniLM-L6-v2 offers the best balance of speed and accuracy');
+        suggestions.push('all-mpnet-base-v2 provides higher accuracy but uses more memory');
+    }
+    if (modelType === 'clip') {
+        suggestions.push('CLIP models support both text and image content');
+        suggestions.push('clip-vit-base-patch32 is recommended for most use cases');
+        suggestions.push('patch16 variants are more accurate but slower');
+    }
+    if (contentTypes?.includes('image')) {
+        suggestions.push('Multimodal content requires CLIP models');
+        suggestions.push('Ensure images are in supported formats (jpg, png, webp)');
+        suggestions.push('Consider image preprocessing for better results');
+    }
+    if (memoryLimit && memoryLimit < 1024) {
+        suggestions.push('Low memory environments should use smaller models');
+        suggestions.push('Consider model quantization to reduce memory usage');
+        suggestions.push('Process content in smaller batches');
+    }
+    return suggestions;
+}
+/**
+ * Get troubleshooting steps for common issues
+ */
+export function getTroubleshootingSteps(issue) {
+    const steps = {
+        'model_loading_failed': [
+            'Check internet connection for model download',
+            'Verify model name spelling and availability',
+            'Ensure sufficient disk space for model cache',
+            'Try clearing the model cache and re-downloading',
+            'Check transformers.js version compatibility'
+        ],
+        'out_of_memory': [
+            'Reduce batch size for processing',
+            'Use a smaller model variant',
+            'Increase Node.js memory limit with --max-old-space-size',
+            'Close other memory-intensive applications',
+            'Consider using model quantization'
+        ],
+        'slow_performance': [
+            'Use GPU acceleration if available',
+            'Optimize batch sizes for your hardware',
+            'Consider using a smaller, faster model',
+            'Profile your code to identify bottlenecks',
+            'Use appropriate hardware for your model size'
+        ],
+        'compatibility_issues': [
+            'Update transformers.js to the latest version',
+            'Check model requirements against your environment',
+            'Verify platform compatibility (Node.js vs browser)',
+            'Test with a known working model first',
+            'Check for conflicting dependencies'
+        ]
+    };
+    return steps[issue] || [
+        'Check the documentation for your specific issue',
+        'Search for similar issues in the project repository',
+        'Ensure all dependencies are up to date',
+        'Try with a minimal test case to isolate the problem'
+    ];
+}
+//# sourceMappingURL=validation-messages.js.map

package/dist/core/vector-index.js CHANGED Viewed

@@ -5,6 +5,7 @@
 import { readFileSync, writeFileSync, existsSync } from 'fs';
 import { JSDOM } from 'jsdom';
 import { ErrorCategory, ErrorSeverity, safeExecute } from './error-handler.js';
+import { createMissingFileError, createDimensionMismatchError } from './actionable-error-messages.js';
 // Set up browser-like environment for hnswlib-wasm
 if (typeof window === 'undefined') {
     const dom = new JSDOM('<!DOCTYPE html><html><body></body></html>', {
@@ -103,7 +104,9 @@ export class VectorIndex {
      */
     async loadIndex() {
         if (!existsSync(this.indexPath)) {
-            throw new Error(`Index file not found: ${this.indexPath}`);
+            throw createMissingFileError(this.indexPath, 'index', {
+                operationContext: 'VectorIndex.loadIndex'
+            });
         }
         try {
             // Load the hnswlib module
@@ -162,11 +165,7 @@ export class VectorIndex {
                 if (stored.vectors && stored.vectors.length > 0) {
                     console.log(`   Actual vector length: ${stored.vectors[0].vector.length}`);
                 }
-                throw new Error(`Vector dimension mismatch!\n` +
-                    `Index was created with ${stored.dimensions} dimensions\n` +
-                    `Current model expects ${this.options.dimensions} dimensions\n` +
-                    `This indicates the embedding model has changed.\n` +
-                    `Please rebuild the index with the current model.`);
+                throw createDimensionMismatchError(this.options.dimensions, stored.dimensions, 'vector index loading', { operationContext: 'VectorIndex.loadIndex' });
             }
             // Update options from stored data
             this.options.maxElements = stored.maxElements || this.options.maxElements;
@@ -227,7 +226,7 @@ export class VectorIndex {
             throw new Error('Index not initialized');
         }
         if (vector.length !== this.options.dimensions) {
-            throw new Error(`Vector dimension mismatch: expected ${this.options.dimensions}, got ${vector.length}`);
+            throw createDimensionMismatchError(this.options.dimensions, vector.length, 'vector addition', { operationContext: 'VectorIndex.addVector' });
         }
         try {
             this.index.addPoint(vector, embeddingId, false);
@@ -255,7 +254,7 @@ export class VectorIndex {
             throw new Error('Index not initialized');
         }
         if (queryVector.length !== this.options.dimensions) {
-            throw new Error(`Query vector dimension mismatch: expected ${this.options.dimensions}, got ${queryVector.length}`);
+            throw createDimensionMismatchError(this.options.dimensions, queryVector.length, 'vector search', { operationContext: 'VectorIndex.search' });
         }
         if (this.currentSize === 0) {
             return { neighbors: [], distances: [] };

package/dist/factories/index.d.ts CHANGED Viewed

@@ -36,7 +36,7 @@
  * ```
  */
 export { TextSearchFactory, TextIngestionFactory, TextRAGFactory, TextFactoryHelpers } from './text-factory.js';
-export type { TextSearchOptions, TextIngestionOptions } from './text-factory.js';
+export type { TextSearchOptions, TextIngestionOptions, ContentSystemConfig } from './text-factory.js';
 export { TextSearchFactory as SearchFactory } from './text-factory.js';
 export { TextIngestionFactory as IngestionFactory } from './text-factory.js';
 export { TextRAGFactory as RAGFactory } from './text-factory.js';

package/dist/factories/text-factory.d.ts CHANGED Viewed

@@ -5,10 +5,21 @@
  * FACTORY PATTERN BENEFITS:
  * - Abstracts complex initialization (model loading, database setup, index initialization)
  * - Provides simple API for common use cases while preserving access to dependency injection
- * - Handles error recovery and validation
+ * - Clear validation and error handling without fallback mechanisms
  * - Supports different embedding models and configurations
  * - Enables clean separation between simple usage and advanced customization
  *
+ * MODE SELECTION GUIDE:
+ * - Text Mode (default): Optimized for text-only content
+ *   - Uses sentence-transformer models (fast, accurate for text)
+ *   - Images converted to text descriptions
+ *   - Best for: document search, text clustering, semantic similarity
+ *
+ * - Multimodal Mode: Optimized for mixed text/image content
+ *   - Uses CLIP models (unified embedding space)
+ *   - True cross-modal search (text finds images, images find text)
+ *   - Best for: image search, visual QA, multimodal retrieval
+ *
  * USAGE PATTERNS:
  *
  * 1. Simple Search Setup:
@@ -43,15 +54,31 @@
  * const results = await searchEngine.search('query');
  * ```
  *
- * 4. Error Recovery:
+ * 4. Clear Error Handling:
  * ```typescript
- * // Create with automatic fallback options
- * const search = await TextFactoryHelpers.createSearchWithFallback(
+ * // Create with clear validation and error reporting
+ * const search = await TextFactoryHelpers.createSearchWithValidation(
  *   './index.bin',
  *   './db.sqlite',
- *   { enableReranking: true } // Will fallback to disabled if reranking fails
+ *   { enableReranking: true } // Clear errors if issues occur
  * );
  * ```
+ *
+ * 5. Mode Selection:
+ * ```typescript
+ * // Text mode (default) - optimized for text-only content
+ * const textIngestion = await TextIngestionFactory.create('./db.sqlite', './index.bin', {
+ *   mode: 'text',
+ *   embeddingModel: 'sentence-transformers/all-MiniLM-L6-v2'
+ * });
+ *
+ * // Multimodal mode - enables cross-modal search
+ * const multimodalIngestion = await TextIngestionFactory.create('./db.sqlite', './index.bin', {
+ *   mode: 'multimodal',
+ *   embeddingModel: 'Xenova/clip-vit-base-patch32',
+ *   rerankingStrategy: 'text-derived'
+ * });
+ * ```
  */
 import { SearchEngine } from '../core/search.js';
 import { IngestionPipeline } from '../core/ingestion.js';
@@ -70,6 +97,21 @@ export interface TextSearchOptions {
     /** Top-k results to return (default: from config) */
     topK?: number;
 }
+/**
+ * Content system configuration options
+ */
+export interface ContentSystemConfig {
+    /** Content directory path (default: '.raglite/content') */
+    contentDir?: string;
+    /** Maximum file size in bytes (default: 50MB) */
+    maxFileSize?: number;
+    /** Maximum content directory size in bytes (default: 2GB) */
+    maxContentDirSize?: number;
+    /** Enable content deduplication (default: true) */
+    enableDeduplication?: boolean;
+    /** Enable storage tracking (default: true) */
+    enableStorageTracking?: boolean;
+}
 /**
  * Options for text ingestion factory
  */
@@ -84,17 +126,30 @@ export interface TextIngestionOptions {
     chunkOverlap?: number;
     /** Whether to force rebuild the index */
     forceRebuild?: boolean;
+    /** Mode for the ingestion pipeline (text or multimodal) */
+    mode?: 'text' | 'multimodal';
+    /** Reranking strategy for multimodal mode */
+    rerankingStrategy?: 'cross-encoder' | 'text-derived' | 'metadata' | 'hybrid' | 'disabled';
+    /** Content system configuration */
+    contentSystemConfig?: ContentSystemConfig;
 }
 /**
  * Factory for creating text-based SearchEngine instances
  * Handles model loading, database initialization, and index setup
  *
  * This factory abstracts the complex initialization process required for text search:
- * 1. Loads and validates text embedding models
- * 2. Optionally loads reranking models with fallback handling
- * 3. Establishes database connections and initializes schema
- * 4. Loads vector indexes with proper model compatibility checking
- * 5. Creates SearchEngine with proper dependency injection
+ * 1. Auto-detects embedding model from database configuration
+ * 2. Validates mode-model compatibility (no fallback mechanisms)
+ * 3. Loads embedding models with clear error reporting
+ * 4. Optionally loads reranking models based on configuration
+ * 5. Establishes database connections and initializes schema
+ * 6. Loads vector indexes with proper model compatibility checking
+ * 7. Creates SearchEngine with proper dependency injection
+ *
+ * Mode Support:
+ * - Automatically detects mode from database (text or multimodal)
+ * - Each mode uses its optimal implementation without fallbacks
+ * - Clear validation ensures mode-model compatibility
  *
  * @example
  * ```typescript
@@ -122,7 +177,7 @@ export declare class TextSearchFactory {
      * This method handles the complete initialization process:
      * - Validates that required files exist
      * - Loads text embedding model (with lazy initialization)
-     * - Optionally loads reranking model (with graceful fallback)
+     * - Optionally loads reranking model (with clear error reporting)
      * - Opens database connection and initializes schema
      * - Loads vector index with compatibility validation
      * - Creates SearchEngine with dependency injection
@@ -187,10 +242,18 @@ export declare class TextSearchFactory {
  *
  * This factory abstracts the complex initialization process required for text ingestion:
  * 1. Creates necessary directories if they don't exist
- * 2. Loads and validates text embedding models
- * 3. Establishes database connections and initializes schema
- * 4. Creates or loads vector indexes with proper configuration
- * 5. Creates IngestionPipeline with proper dependency injection
+ * 2. Validates mode-model compatibility (no fallback mechanisms)
+ * 3. Loads and validates embedding models with clear error reporting
+ * 4. Establishes database connections and initializes schema
+ * 5. Stores mode configuration in database for automatic detection
+ * 6. Creates or loads vector indexes with proper configuration
+ * 7. Creates IngestionPipeline with proper dependency injection
+ *
+ * Mode Configuration:
+ * - Text Mode (default): Uses sentence-transformer models for text-only content
+ * - Multimodal Mode: Uses CLIP models for mixed text/image content
+ * - Mode is stored in database and auto-detected during search
+ * - Clear validation prevents mode-model mismatches
  *
  * @example
  * ```typescript
@@ -232,20 +295,39 @@ export declare class TextIngestionFactory {
      * @param options.chunkSize - Override chunk size (default: from config)
      * @param options.chunkOverlap - Override chunk overlap (default: from config)
      * @param options.forceRebuild - Force rebuild of existing index (default: false)
+     * @param options.contentSystemConfig - Content system configuration options
+     * @param options.contentSystemConfig.contentDir - Content directory path (default: '.raglite/content')
+     * @param options.contentSystemConfig.maxFileSize - Maximum file size in bytes (default: 50MB)
+     * @param options.contentSystemConfig.maxContentDirSize - Maximum content directory size (default: 2GB)
+     * @param options.contentSystemConfig.enableDeduplication - Enable content deduplication (default: true)
+     * @param options.contentSystemConfig.enableStorageTracking - Enable storage tracking (default: true)
      * @returns Promise resolving to configured IngestionPipeline
      * @throws {Error} If initialization fails
      *
      * @example
      * ```typescript
-     * // Create ingestion pipeline
+     * // Create ingestion pipeline with default content system
      * const ingestion = await TextIngestionFactory.create('./my-db.sqlite', './my-index.bin');
      *
+     * // Create with custom content system configuration
+     * const ingestion = await TextIngestionFactory.create('./my-db.sqlite', './my-index.bin', {
+     *   contentSystemConfig: {
+     *     contentDir: './custom-content',
+     *     maxFileSize: 100 * 1024 * 1024, // 100MB
+     *     maxContentDirSize: 5 * 1024 * 1024 * 1024, // 5GB
+     *     enableDeduplication: true
+     *   }
+     * });
+     *
      * // Ingest documents from directory
      * const result = await ingestion.ingestDirectory('./documents');
      * console.log(`Processed ${result.documentsProcessed} documents`);
      *
-     * // Ingest single file
-     * await ingestion.ingestFile('./document.pdf');
+     * // Ingest content from memory (MCP integration)
+     * const contentId = await ingestion.ingestFromMemory(buffer, {
+     *   displayName: 'uploaded-file.pdf',
+     *   contentType: 'application/pdf'
+     * });
      *
      * // Clean up when done
      * await ingestion.cleanup();
@@ -259,6 +341,22 @@ export declare class TextIngestionFactory {
      * @returns Promise resolving to configured IngestionPipeline
      */
     static createWithDefaults(options?: TextIngestionOptions): Promise<IngestionPipeline>;
+    /**
+     * Handles mode storage during ingestion
+     * Creates or validates system info based on the provided mode and options
+     * @private
+     */
+    private static handleModeStorage;
+    /**
+     * Updates system info in the database
+     * @private
+     */
+    private static updateSystemInfo;
+    /**
+     * Validates and prepares content system configuration
+     * @private
+     */
+    private static validateAndPrepareContentSystemConfig;
 }
 /**
  * Convenience factory to create both search and ingestion instances
@@ -365,9 +463,9 @@ export declare class TextRAGFactory {
  * const { searchOptions, ingestionOptions } = TextFactoryHelpers.getRecommendedConfig('quality');
  * const search = await TextSearchFactory.create('./index.bin', './db.sqlite', searchOptions);
  *
- * // Create with automatic error recovery
- * const search = await TextFactoryHelpers.createSearchWithFallback('./index.bin', './db.sqlite', {
- *   enableReranking: true // Will fallback to disabled if reranking fails
+ * // Create with clear validation and error reporting
+ * const search = await TextFactoryHelpers.createSearchWithValidation('./index.bin', './db.sqlite', {
+ *   enableReranking: true // Will fail clearly if reranking has issues
  * });
  * ```
  */
@@ -433,34 +531,30 @@ export declare class TextFactoryHelpers {
         ingestionOptions: TextIngestionOptions;
     };
     /**
-     * Create a search engine with automatic error recovery
+     * Create a search engine with clear error reporting
      *
-     * This method attempts to create a search engine with the provided options,
-     * and if that fails, it tries again with fallback options (primarily
-     * disabling reranking, which is a common source of initialization failures).
-     * This provides a more robust way to create search engines in environments
-     * where reranking models might not be available or might fail to load.
+     * This method creates a search engine with the provided options and fails
+     * clearly if there are any issues, providing actionable error messages.
      *
      * @param indexPath - Path to vector index file
      * @param dbPath - Path to database file
-     * @param options - Initial options to try
-     * @returns Promise resolving to SearchEngine (possibly with fallback options)
-     * @throws {Error} If both original and fallback creation attempts fail
+     * @param options - Configuration options
+     * @returns Promise resolving to SearchEngine
+     * @throws {Error} If creation fails with clear error message
      *
      * @example
      * ```typescript
-     * // Try to create with reranking, fallback to without if it fails
-     * const search = await TextFactoryHelpers.createSearchWithFallback(
+     * // Create search engine with clear error handling
+     * const search = await TextFactoryHelpers.createSearchWithValidation(
      *   './index.bin',
      *   './db.sqlite',
      *   { enableReranking: true, topK: 20 }
      * );
      *
-     * // The search engine will work even if reranking model fails to load
      * const results = await search.search('query');
      * console.log(`Search created successfully with ${results.length} results`);
      * ```
      */
-    static createSearchWithFallback(indexPath: string, dbPath: string, options?: TextSearchOptions): Promise<SearchEngine>;
+    static createSearchWithValidation(indexPath: string, dbPath: string, options?: TextSearchOptions): Promise<SearchEngine>;
 }
 //# sourceMappingURL=text-factory.d.ts.map