npm - rag-lite-ts - Versions diffs - 2.3.0 → 2.3.1 - Mend

rag-lite-ts 2.3.0 → 2.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (65) hide show

package/dist/cjs/cli/search.js +77 -2
package/dist/cjs/cli.js +28 -1
package/dist/cjs/core/abstract-generator.d.ts +97 -0
package/dist/cjs/core/abstract-generator.js +222 -0
package/dist/cjs/core/binary-index-format.js +47 -7
package/dist/cjs/core/generator-registry.d.ts +114 -0
package/dist/cjs/core/generator-registry.js +280 -0
package/dist/cjs/core/index.d.ts +4 -0
package/dist/cjs/core/index.js +11 -0
package/dist/cjs/core/lazy-dependency-loader.d.ts +43 -0
package/dist/cjs/core/lazy-dependency-loader.js +111 -2
package/dist/cjs/core/prompt-templates.d.ts +138 -0
package/dist/cjs/core/prompt-templates.js +225 -0
package/dist/cjs/core/response-generator.d.ts +132 -0
package/dist/cjs/core/response-generator.js +69 -0
package/dist/cjs/core/search.d.ts +72 -1
package/dist/cjs/core/search.js +79 -6
package/dist/cjs/core/types.d.ts +1 -0
package/dist/cjs/core/vector-index-worker.js +10 -0
package/dist/cjs/core/vector-index.js +69 -19
package/dist/cjs/factories/generator-factory.d.ts +88 -0
package/dist/cjs/factories/generator-factory.js +151 -0
package/dist/cjs/factories/index.d.ts +1 -0
package/dist/cjs/factories/index.js +5 -0
package/dist/cjs/index.d.ts +9 -0
package/dist/cjs/index.js +16 -0
package/dist/cjs/text/generators/causal-lm-generator.d.ts +65 -0
package/dist/cjs/text/generators/causal-lm-generator.js +197 -0
package/dist/cjs/text/generators/index.d.ts +10 -0
package/dist/cjs/text/generators/index.js +10 -0
package/dist/cjs/text/generators/instruct-generator.d.ts +62 -0
package/dist/cjs/text/generators/instruct-generator.js +192 -0
package/dist/esm/cli/search.js +77 -2
package/dist/esm/cli.js +28 -1
package/dist/esm/core/abstract-generator.d.ts +97 -0
package/dist/esm/core/abstract-generator.js +222 -0
package/dist/esm/core/binary-index-format.js +47 -7
package/dist/esm/core/generator-registry.d.ts +114 -0
package/dist/esm/core/generator-registry.js +280 -0
package/dist/esm/core/index.d.ts +4 -0
package/dist/esm/core/index.js +11 -0
package/dist/esm/core/lazy-dependency-loader.d.ts +43 -0
package/dist/esm/core/lazy-dependency-loader.js +111 -2
package/dist/esm/core/prompt-templates.d.ts +138 -0
package/dist/esm/core/prompt-templates.js +225 -0
package/dist/esm/core/response-generator.d.ts +132 -0
package/dist/esm/core/response-generator.js +69 -0
package/dist/esm/core/search.d.ts +72 -1
package/dist/esm/core/search.js +79 -6
package/dist/esm/core/types.d.ts +1 -0
package/dist/esm/core/vector-index-worker.js +10 -0
package/dist/esm/core/vector-index.js +69 -19
package/dist/esm/factories/generator-factory.d.ts +88 -0
package/dist/esm/factories/generator-factory.js +151 -0
package/dist/esm/factories/index.d.ts +1 -0
package/dist/esm/factories/index.js +5 -0
package/dist/esm/index.d.ts +9 -0
package/dist/esm/index.js +16 -0
package/dist/esm/text/generators/causal-lm-generator.d.ts +65 -0
package/dist/esm/text/generators/causal-lm-generator.js +197 -0
package/dist/esm/text/generators/index.d.ts +10 -0
package/dist/esm/text/generators/index.js +10 -0
package/dist/esm/text/generators/instruct-generator.d.ts +62 -0
package/dist/esm/text/generators/instruct-generator.js +192 -0
package/package.json +1 -1

package/dist/esm/core/generator-registry.js ADDED Viewed

@@ -0,0 +1,280 @@
+/**
+ * CORE MODULE — Generator Registry for RAG Response Generation
+ *
+ * Centralized registry of supported generator models with validation and
+ * compatibility checking. Follows the same patterns as model-registry.ts.
+ *
+ * SUPPORTED MODELS:
+ * - HuggingFaceTB/SmolLM2-135M-Instruct: Balanced instruct model (DEFAULT, 3 chunks)
+ * - HuggingFaceTB/SmolLM2-360M-Instruct: Higher quality instruct model (5 chunks)
+ *
+ * PREREQUISITES:
+ * - Reranking must be enabled for response generation to ensure quality context
+ *
+ * @experimental This feature is experimental and may change in future versions.
+ */
+// =============================================================================
+// GENERATOR REGISTRY
+// =============================================================================
+/**
+ * Registry of supported generator models with their metadata and capabilities
+ */
+export const SUPPORTED_GENERATORS = {
+    // SmolLM2-135M-Instruct - Balanced instruction-tuned model (RECOMMENDED DEFAULT)
+    'HuggingFaceTB/SmolLM2-135M-Instruct': {
+        name: 'HuggingFaceTB/SmolLM2-135M-Instruct',
+        type: 'instruct',
+        version: '1.0.0',
+        description: 'Balanced instruction-tuned model with good quality and speed (uses top 3 chunks)',
+        capabilities: {
+            supportsStreaming: true,
+            supportsSystemPrompt: true, // Instruct models support system prompts
+            instructionTuned: true,
+            maxContextLength: 2048,
+            defaultMaxOutputTokens: 512,
+            recommendedTemperature: 0.1,
+            defaultMaxChunksForContext: 3 // Use top 3 reranked chunks for context
+        },
+        requirements: {
+            transformersJsVersion: '>=3.0.0',
+            minimumMemory: 768,
+            requiredFeatures: ['text-generation'],
+            platformSupport: ['node', 'browser']
+        },
+        isDefault: true // Recommended default model
+    },
+    // SmolLM2-360M-Instruct - Higher quality instruction-tuned model
+    'HuggingFaceTB/SmolLM2-360M-Instruct': {
+        name: 'HuggingFaceTB/SmolLM2-360M-Instruct',
+        type: 'instruct',
+        version: '1.0.0',
+        description: 'Higher quality instruction-tuned model, slower but more accurate (uses top 5 chunks)',
+        capabilities: {
+            supportsStreaming: true,
+            supportsSystemPrompt: true,
+            instructionTuned: true,
+            maxContextLength: 2048,
+            defaultMaxOutputTokens: 512,
+            recommendedTemperature: 0.1,
+            defaultMaxChunksForContext: 5 // Use top 5 reranked chunks for context
+        },
+        requirements: {
+            transformersJsVersion: '>=3.0.0',
+            minimumMemory: 1024,
+            requiredFeatures: ['text-generation'],
+            platformSupport: ['node', 'browser']
+        },
+        isDefault: false
+    }
+};
+// =============================================================================
+// DEFAULT MODEL
+// =============================================================================
+/** Default generator model name */
+export const DEFAULT_GENERATOR_MODEL = 'HuggingFaceTB/SmolLM2-135M-Instruct';
+// =============================================================================
+// GENERATOR REGISTRY CLASS
+// =============================================================================
+/**
+ * Generator registry class providing validation and model information services
+ */
+export class GeneratorRegistry {
+    /**
+     * Gets generator model information for a given model name
+     * @param modelName - Name of the generator model
+     * @returns Generator model information or null if not supported
+     */
+    static getGeneratorInfo(modelName) {
+        return SUPPORTED_GENERATORS[modelName] || null;
+    }
+    /**
+     * Validates a generator model name and returns compatibility information
+     * @param modelName - Name of the model to validate
+     * @returns Validation result with errors, warnings, and suggestions
+     */
+    static validateGenerator(modelName) {
+        const modelInfo = this.getGeneratorInfo(modelName);
+        if (!modelInfo) {
+            const suggestions = this.getSimilarGenerators(modelName);
+            return {
+                isValid: false,
+                errors: [`Generator model '${modelName}' is not supported`],
+                warnings: [],
+                suggestions: suggestions.length > 0
+                    ? [`Did you mean: ${suggestions.join(', ')}?`]
+                    : [`Available generators: ${this.getSupportedGenerators().join(', ')}`]
+            };
+        }
+        const warnings = [];
+        const suggestions = [];
+        // Memory warnings
+        if (modelInfo.requirements.minimumMemory > 768) {
+            warnings.push(`Model requires ${modelInfo.requirements.minimumMemory}MB memory`);
+        }
+        return {
+            isValid: true,
+            errors: [],
+            warnings,
+            suggestions
+        };
+    }
+    /**
+     * Lists all supported generator models
+     * @param modelType - Optional filter by model type
+     * @returns Array of supported generator model names
+     */
+    static getSupportedGenerators(modelType) {
+        const allModels = Object.keys(SUPPORTED_GENERATORS);
+        if (!modelType) {
+            return allModels;
+        }
+        return allModels.filter(modelName => SUPPORTED_GENERATORS[modelName].type === modelType);
+    }
+    /**
+     * Gets the default generator model name
+     * @returns Default generator model name
+     */
+    static getDefaultGenerator() {
+        return DEFAULT_GENERATOR_MODEL;
+    }
+    /**
+     * Gets generators by type
+     * @param type - Generator type ('causal-lm' or 'instruct')
+     * @returns Array of model names matching the type
+     */
+    static getGeneratorsByType(type) {
+        return Object.keys(SUPPORTED_GENERATORS).filter(modelName => SUPPORTED_GENERATORS[modelName].type === type);
+    }
+    /**
+     * Checks if a generator model supports a specific capability
+     * @param modelName - Name of the model
+     * @param capability - Capability to check
+     * @returns True if the model supports the capability
+     */
+    static supportsCapability(modelName, capability) {
+        const modelInfo = this.getGeneratorInfo(modelName);
+        if (!modelInfo)
+            return false;
+        const value = modelInfo.capabilities[capability];
+        return typeof value === 'boolean' ? value : value !== undefined;
+    }
+    /**
+     * Gets generators similar to the given model name (for suggestions)
+     * @private
+     */
+    static getSimilarGenerators(modelName) {
+        const allModels = Object.keys(SUPPORTED_GENERATORS);
+        const lowerModelName = modelName.toLowerCase();
+        // Simple similarity check based on common substrings
+        const keywords = ['gpt', 'smol', 'lm', 'instruct', 'distil'];
+        const modelKeywords = keywords.filter(keyword => lowerModelName.includes(keyword));
+        return allModels.filter(supportedModel => {
+            const lowerSupported = supportedModel.toLowerCase();
+            return modelKeywords.some(keyword => lowerSupported.includes(keyword));
+        }).slice(0, 3);
+    }
+    /**
+     * Validates system compatibility for a generator model
+     * @param modelName - Name of the model
+     * @param systemCapabilities - System capabilities to check against
+     * @returns Validation result with compatibility information
+     */
+    static validateSystemCompatibility(modelName, systemCapabilities) {
+        const modelInfo = this.getGeneratorInfo(modelName);
+        if (!modelInfo) {
+            return {
+                isValid: false,
+                errors: [`Generator model '${modelName}' is not supported`],
+                warnings: [],
+                suggestions: []
+            };
+        }
+        const errors = [];
+        const warnings = [];
+        const suggestions = [];
+        // Check memory requirements
+        if (systemCapabilities.availableMemory !== undefined) {
+            if (systemCapabilities.availableMemory < modelInfo.requirements.minimumMemory) {
+                errors.push(`Insufficient memory: ${systemCapabilities.availableMemory}MB available, ` +
+                    `${modelInfo.requirements.minimumMemory}MB required`);
+                // Suggest lighter models
+                const lighterModels = this.getSupportedGenerators().filter(name => {
+                    const info = this.getGeneratorInfo(name);
+                    return info &&
+                        info.requirements.minimumMemory <= systemCapabilities.availableMemory;
+                });
+                if (lighterModels.length > 0) {
+                    suggestions.push(`Consider lighter models: ${lighterModels.join(', ')}`);
+                }
+            }
+        }
+        // Check platform compatibility
+        if (systemCapabilities.platform) {
+            if (!modelInfo.requirements.platformSupport.includes(systemCapabilities.platform)) {
+                errors.push(`Platform '${systemCapabilities.platform}' not supported. ` +
+                    `Supported platforms: ${modelInfo.requirements.platformSupport.join(', ')}`);
+            }
+        }
+        return {
+            isValid: errors.length === 0,
+            errors,
+            warnings,
+            suggestions
+        };
+    }
+}
+// =============================================================================
+// UTILITY FUNCTIONS
+// =============================================================================
+/**
+ * Gets the generator type for a given model name
+ * @param modelName - Name of the model
+ * @returns Generator type or null if model not supported
+ */
+export function getGeneratorType(modelName) {
+    const modelInfo = GeneratorRegistry.getGeneratorInfo(modelName);
+    return modelInfo ? modelInfo.type : null;
+}
+/**
+ * Checks if a model is an instruction-tuned model
+ * @param modelName - Name of the model
+ * @returns True if the model is instruction-tuned
+ */
+export function isInstructionTunedModel(modelName) {
+    const modelInfo = GeneratorRegistry.getGeneratorInfo(modelName);
+    return modelInfo ? modelInfo.capabilities.instructionTuned : false;
+}
+/**
+ * Gets the maximum context length for a generator model
+ * @param modelName - Name of the model
+ * @returns Maximum context length or null if model not supported
+ */
+export function getMaxContextLength(modelName) {
+    const modelInfo = GeneratorRegistry.getGeneratorInfo(modelName);
+    return modelInfo ? modelInfo.capabilities.maxContextLength : null;
+}
+/**
+ * Gets recommended generation settings for a model
+ * @param modelName - Name of the model
+ * @returns Recommended settings or null if model not supported
+ */
+export function getRecommendedSettings(modelName) {
+    const modelInfo = GeneratorRegistry.getGeneratorInfo(modelName);
+    if (!modelInfo)
+        return null;
+    return {
+        temperature: modelInfo.capabilities.recommendedTemperature,
+        maxTokens: modelInfo.capabilities.defaultMaxOutputTokens,
+        maxChunksForContext: modelInfo.capabilities.defaultMaxChunksForContext
+    };
+}
+/**
+ * Gets the default maximum chunks for context for a generator model
+ * @param modelName - Name of the model
+ * @returns Default max chunks for context or null if model not supported
+ */
+export function getDefaultMaxChunksForContext(modelName) {
+    const modelInfo = GeneratorRegistry.getGeneratorInfo(modelName);
+    return modelInfo ? modelInfo.capabilities.defaultMaxChunksForContext : null;
+}
+//# sourceMappingURL=generator-registry.js.map

package/dist/esm/core/index.d.ts CHANGED Viewed

@@ -56,4 +56,8 @@ export * from './path-manager.js';
 export { ContentManager, type MemoryContentMetadata, type ContentIngestionResult, type ContentManagerConfig } from './content-manager.js';
 export { ContentResolver, type ContentRequest, type ContentResult } from './content-resolver.js';
 export * from './error-handler.js';
+export { type ResponseGenerator, type GeneratorModelType, type GenerationRequest, type GenerationResult, type GeneratorCapabilities, type GeneratorRequirements, type GeneratorModelInfo, type GeneratorValidationResult, type GeneratorCreationOptions, type GenerateFunction, type CreateGeneratorFunction, GeneratorValidationError, GenerationError, ContextWindowError, supportsStreaming, isInstructModel, createGenerateFunction } from './response-generator.js';
+export { SUPPORTED_GENERATORS, DEFAULT_GENERATOR_MODEL, GeneratorRegistry, getGeneratorType, isInstructionTunedModel, getMaxContextLength, getRecommendedSettings, getDefaultMaxChunksForContext } from './generator-registry.js';
+export { DEFAULT_SYSTEM_PROMPT, DEFAULT_SYSTEM_PROMPT_WITH_ATTRIBUTION, SMOLLM2_CHAT_TEMPLATE, formatContextChunks, buildPrompt, estimateTokenCount, calculateContextBudget, getDefaultStopSequences, type ContextFormattingOptions, type FormattedContext, type PromptBuildOptions, type BuiltPrompt } from './prompt-templates.js';
+export { BaseResponseGenerator, createGeneratorOptions, type GeneratorOptions } from './abstract-generator.js';
 //# sourceMappingURL=index.d.ts.map

package/dist/esm/core/index.js CHANGED Viewed

@@ -66,4 +66,15 @@ export { ContentManager } from './content-manager.js';
 export { ContentResolver } from './content-resolver.js';
 // Error handling framework - supports implementation-specific error contexts
 export * from './error-handler.js';
+// =============================================================================
+// EXPERIMENTAL: Response Generation
+// =============================================================================
+// Response generation types and interfaces (experimental)
+export { GeneratorValidationError, GenerationError, ContextWindowError, supportsStreaming, isInstructModel, createGenerateFunction } from './response-generator.js';
+// Generator registry (experimental)
+export { SUPPORTED_GENERATORS, DEFAULT_GENERATOR_MODEL, GeneratorRegistry, getGeneratorType, isInstructionTunedModel, getMaxContextLength, getRecommendedSettings, getDefaultMaxChunksForContext } from './generator-registry.js';
+// Prompt templates for generation (experimental)
+export { DEFAULT_SYSTEM_PROMPT, DEFAULT_SYSTEM_PROMPT_WITH_ATTRIBUTION, SMOLLM2_CHAT_TEMPLATE, formatContextChunks, buildPrompt, estimateTokenCount, calculateContextBudget, getDefaultStopSequences } from './prompt-templates.js';
+// Abstract base generator (experimental)
+export { BaseResponseGenerator, createGeneratorOptions } from './abstract-generator.js';
 //# sourceMappingURL=index.js.map

package/dist/esm/core/lazy-dependency-loader.d.ts CHANGED Viewed

@@ -8,6 +8,7 @@
 import '../dom-polyfills.js';
 import type { UniversalEmbedder } from './universal-embedder.js';
 import type { RerankFunction } from './interfaces.js';
+import type { ResponseGenerator } from './response-generator.js';
 /**
  * Lazy loader for embedder implementations
  * Only loads the specific embedder type when needed
@@ -42,6 +43,42 @@ export declare class LazyEmbedderLoader {
         multimodalEmbedders: number;
     };
 }
+/**
+ * Lazy loader for response generator implementations
+ * Only loads the specific generator type when needed
+ *
+ * @experimental This feature is experimental and may change in future versions.
+ */
+export declare class LazyGeneratorLoader {
+    private static cache;
+    /**
+     * Lazily load and create an instruct generator (SmolLM2-Instruct)
+     * Only imports the module when generation is actually requested
+     */
+    static loadInstructGenerator(modelName: string, options?: any): Promise<ResponseGenerator>;
+    /**
+     * Lazily load and create a causal LM generator (DistilGPT2)
+     * Only imports the module when generation is actually requested
+     */
+    static loadCausalLMGenerator(modelName: string, options?: any): Promise<ResponseGenerator>;
+    /**
+     * Check if a generator is already loaded in cache
+     */
+    static isGeneratorLoaded(modelName: string, modelType: 'instruct' | 'causal-lm'): boolean;
+    /**
+     * Remove a generator from the cache (called when generator is cleaned up)
+     */
+    static removeGeneratorFromCache(modelName: string, modelType: 'instruct' | 'causal-lm'): void;
+    /**
+     * Get statistics about loaded generators
+     */
+    static getLoadingStats(): {
+        loadedGenerators: string[];
+        totalLoaded: number;
+        instructGenerators: number;
+        causalLMGenerators: number;
+    };
+}
 /**
  * Lazy loader for reranking implementations
  * Only loads the specific reranker type when needed
@@ -107,6 +144,11 @@ export declare class LazyMultimodalLoader {
  * Provides a single entry point for dependency management
  */
 export declare class LazyDependencyManager {
+    /**
+     * Load response generator based on model type with lazy loading
+     * @experimental This feature is experimental and may change in future versions.
+     */
+    static loadGenerator(modelName: string, modelType: 'instruct' | 'causal-lm', options?: any): Promise<ResponseGenerator>;
     /**
      * Load embedder based on model type with lazy loading
      */
@@ -121,6 +163,7 @@ export declare class LazyDependencyManager {
     static getLoadingStatistics(): {
         embedders: ReturnType<typeof LazyEmbedderLoader.getLoadingStats>;
         rerankers: ReturnType<typeof LazyRerankerLoader.getLoadingStats>;
+        generators: ReturnType<typeof LazyGeneratorLoader.getLoadingStats>;
         multimodal: ReturnType<typeof LazyMultimodalLoader.getMultimodalLoadingStatus>;
         totalModulesLoaded: number;
         memoryImpact: 'low' | 'medium' | 'high';

package/dist/esm/core/lazy-dependency-loader.js CHANGED Viewed

@@ -149,6 +149,99 @@ export class LazyEmbedderLoader {
     }
 }
 // =============================================================================
+// LAZY GENERATOR LOADING
+// =============================================================================
+/**
+ * Lazy loader for response generator implementations
+ * Only loads the specific generator type when needed
+ *
+ * @experimental This feature is experimental and may change in future versions.
+ */
+export class LazyGeneratorLoader {
+    static cache = LazyLoadingCache.getInstance();
+    /**
+     * Lazily load and create an instruct generator (SmolLM2-Instruct)
+     * Only imports the module when generation is actually requested
+     */
+    static async loadInstructGenerator(modelName, options = {}) {
+        const cacheKey = `generator:instruct:${modelName}`;
+        return this.cache.getOrLoad(cacheKey, async () => {
+            try {
+                console.log(`🔄 [EXPERIMENTAL] Lazy loading instruct generator: ${modelName}`);
+                // Dynamic import - only loaded when generation is requested
+                const { InstructGenerator } = await import('../text/generators/instruct-generator.js');
+                const generator = new InstructGenerator(modelName, options);
+                await generator.loadModel();
+                console.log(`✅ Instruct generator loaded: ${modelName}`);
+                return generator;
+            }
+            catch (error) {
+                const enhancedError = createError.model(`Failed to lazy load instruct generator '${modelName}': ${error instanceof Error ? error.message : 'Unknown error'}`);
+                handleError(enhancedError, 'LazyGeneratorLoader', {
+                    severity: ErrorSeverity.ERROR,
+                    category: ErrorCategory.MODEL
+                });
+                throw enhancedError;
+            }
+        });
+    }
+    /**
+     * Lazily load and create a causal LM generator (DistilGPT2)
+     * Only imports the module when generation is actually requested
+     */
+    static async loadCausalLMGenerator(modelName, options = {}) {
+        const cacheKey = `generator:causal-lm:${modelName}`;
+        return this.cache.getOrLoad(cacheKey, async () => {
+            try {
+                console.log(`🔄 [EXPERIMENTAL] Lazy loading causal LM generator: ${modelName}`);
+                // Dynamic import - only loaded when generation is requested
+                const { CausalLMGenerator } = await import('../text/generators/causal-lm-generator.js');
+                const generator = new CausalLMGenerator(modelName, options);
+                await generator.loadModel();
+                console.log(`✅ Causal LM generator loaded: ${modelName}`);
+                return generator;
+            }
+            catch (error) {
+                const enhancedError = createError.model(`Failed to lazy load causal LM generator '${modelName}': ${error instanceof Error ? error.message : 'Unknown error'}`);
+                handleError(enhancedError, 'LazyGeneratorLoader', {
+                    severity: ErrorSeverity.ERROR,
+                    category: ErrorCategory.MODEL
+                });
+                throw enhancedError;
+            }
+        });
+    }
+    /**
+     * Check if a generator is already loaded in cache
+     */
+    static isGeneratorLoaded(modelName, modelType) {
+        const cacheKey = `generator:${modelType}:${modelName}`;
+        return this.cache.getLoadedModules().includes(cacheKey);
+    }
+    /**
+     * Remove a generator from the cache (called when generator is cleaned up)
+     */
+    static removeGeneratorFromCache(modelName, modelType) {
+        const cacheKey = `generator:${modelType}:${modelName}`;
+        this.cache.remove(cacheKey);
+        console.log(`🧹 Removed generator from cache: ${cacheKey}`);
+    }
+    /**
+     * Get statistics about loaded generators
+     */
+    static getLoadingStats() {
+        const loadedModules = this.cache.getLoadedModules().filter(key => key.startsWith('generator:'));
+        const instructGenerators = loadedModules.filter(key => key.includes(':instruct:')).length;
+        const causalLMGenerators = loadedModules.filter(key => key.includes(':causal-lm:')).length;
+        return {
+            loadedGenerators: loadedModules,
+            totalLoaded: loadedModules.length,
+            instructGenerators,
+            causalLMGenerators
+        };
+    }
+}
+// =============================================================================
 // LAZY RERANKER LOADING
 // =============================================================================
 /**
@@ -332,6 +425,20 @@ export class LazyMultimodalLoader {
  * Provides a single entry point for dependency management
  */
 export class LazyDependencyManager {
+    /**
+     * Load response generator based on model type with lazy loading
+     * @experimental This feature is experimental and may change in future versions.
+     */
+    static async loadGenerator(modelName, modelType, options = {}) {
+        switch (modelType) {
+            case 'instruct':
+                return LazyGeneratorLoader.loadInstructGenerator(modelName, options);
+            case 'causal-lm':
+                return LazyGeneratorLoader.loadCausalLMGenerator(modelName, options);
+            default:
+                throw createError.validation(`Unsupported generator model type for lazy loading: ${modelType}`);
+        }
+    }
     /**
      * Load embedder based on model type with lazy loading
      */
@@ -367,19 +474,21 @@ export class LazyDependencyManager {
     static getLoadingStatistics() {
         const embedderStats = LazyEmbedderLoader.getLoadingStats();
         const rerankerStats = LazyRerankerLoader.getLoadingStats();
+        const generatorStats = LazyGeneratorLoader.getLoadingStats();
         const multimodalStats = LazyMultimodalLoader.getMultimodalLoadingStatus();
-        const totalModules = embedderStats.totalLoaded + rerankerStats.totalLoaded + multimodalStats.loadedProcessors.length;
+        const totalModules = embedderStats.totalLoaded + rerankerStats.totalLoaded + generatorStats.totalLoaded + multimodalStats.loadedProcessors.length;
         // Estimate memory impact based on loaded modules
         let memoryImpact = 'low';
         if (embedderStats.multimodalEmbedders > 0 || multimodalStats.imageToTextLoaded) {
             memoryImpact = 'high';
         }
-        else if (totalModules > 2) {
+        else if (totalModules > 2 || generatorStats.totalLoaded > 0) {
             memoryImpact = 'medium';
         }
         return {
             embedders: embedderStats,
             rerankers: rerankerStats,
+            generators: generatorStats,
             multimodal: multimodalStats,
             totalModulesLoaded: totalModules,
             memoryImpact

package/dist/esm/core/prompt-templates.d.ts ADDED Viewed

@@ -0,0 +1,138 @@
+/**
+ * CORE MODULE — Prompt Templates for RAG Response Generation
+ *
+ * Provides prompt engineering utilities for different generator model types.
+ * Handles context formatting, token budget management, and system prompts.
+ *
+ * PROMPT STRATEGIES:
+ * - Instruct models: Use chat template with system/user/assistant roles
+ * - Causal LM models: Use simple document + question format
+ *
+ * @experimental This feature is experimental and may change in future versions.
+ */
+import type { SearchResult } from './types.js';
+import type { GeneratorModelType } from './response-generator.js';
+/**
+ * Default system prompt for instruct models
+ * Emphasizes grounded responses using only provided context
+ */
+export declare const DEFAULT_SYSTEM_PROMPT = "You are a helpful assistant that answers questions based ONLY on the provided context documents. Follow these rules strictly:\n\n1. Answer ONLY using information found in the context documents\n2. If the answer cannot be found in the context, say \"I cannot find this information in the provided documents\"\n3. Do not make up information or use external knowledge\n4. Be concise and direct in your response\n5. If the context is incomplete or unclear, acknowledge this limitation";
+/**
+ * Default system prompt for RAG with source attribution
+ */
+export declare const DEFAULT_SYSTEM_PROMPT_WITH_ATTRIBUTION = "You are a helpful assistant that answers questions based ONLY on the provided context documents. Follow these rules strictly:\n\n1. Answer ONLY using information found in the context documents\n2. When possible, mention which document the information comes from\n3. If the answer cannot be found in the context, say \"I cannot find this information in the provided documents\"\n4. Do not make up information or use external knowledge\n5. Be concise and direct in your response";
+/**
+ * SmolLM2 chat template format
+ * Uses <|im_start|> and <|im_end|> tokens
+ */
+export declare const SMOLLM2_CHAT_TEMPLATE: {
+    systemStart: string;
+    systemEnd: string;
+    userStart: string;
+    userEnd: string;
+    assistantStart: string;
+    assistantEnd: string;
+    endOfText: string;
+};
+/**
+ * Options for formatting context chunks
+ */
+export interface ContextFormattingOptions {
+    /** Maximum tokens available for context */
+    maxContextTokens: number;
+    /** Include document titles/sources */
+    includeDocumentInfo?: boolean;
+    /** Include relevance scores */
+    includeScores?: boolean;
+    /** Separator between chunks */
+    chunkSeparator?: string;
+    /** Token estimation function (chars to tokens ratio) */
+    tokenEstimationRatio?: number;
+}
+/**
+ * Result of context formatting
+ */
+export interface FormattedContext {
+    /** Formatted context string */
+    text: string;
+    /** Estimated token count */
+    estimatedTokens: number;
+    /** Number of chunks included */
+    chunksIncluded: number;
+    /** Total chunks available */
+    totalChunks: number;
+    /** Whether context was truncated */
+    truncated: boolean;
+}
+/**
+ * Format search result chunks into context string for the prompt
+ *
+ * @param chunks - Search result chunks to format
+ * @param options - Formatting options
+ * @returns Formatted context with metadata
+ */
+export declare function formatContextChunks(chunks: SearchResult[], options: ContextFormattingOptions): FormattedContext;
+/**
+ * Options for building the complete prompt
+ */
+export interface PromptBuildOptions {
+    /** User's query */
+    query: string;
+    /** Search result chunks */
+    chunks: SearchResult[];
+    /** Generator model type */
+    modelType: GeneratorModelType;
+    /** Custom system prompt (optional) */
+    systemPrompt?: string;
+    /** Maximum context window tokens */
+    maxContextLength: number;
+    /** Tokens reserved for output */
+    reservedOutputTokens: number;
+    /** Include source attribution hint */
+    includeSourceAttribution?: boolean;
+}
+/**
+ * Result of prompt building
+ */
+export interface BuiltPrompt {
+    /** Complete prompt string */
+    prompt: string;
+    /** Estimated total tokens */
+    estimatedTokens: number;
+    /** Context metadata */
+    contextInfo: FormattedContext;
+    /** System prompt used (if any) */
+    systemPromptUsed?: string;
+}
+/**
+ * Build a complete prompt for the generator model
+ *
+ * @param options - Prompt building options
+ * @returns Built prompt with metadata
+ */
+export declare function buildPrompt(options: PromptBuildOptions): BuiltPrompt;
+/**
+ * Estimate token count for a string
+ * Uses a simple character-based heuristic (~4 chars per token for English)
+ *
+ * @param text - Text to estimate tokens for
+ * @returns Estimated token count
+ */
+export declare function estimateTokenCount(text: string): number;
+/**
+ * Calculate available context budget
+ *
+ * @param maxContextLength - Maximum context window size
+ * @param reservedOutputTokens - Tokens reserved for generation
+ * @param promptOverhead - Tokens used by prompt formatting
+ * @returns Available tokens for context chunks
+ */
+export declare function calculateContextBudget(maxContextLength: number, reservedOutputTokens: number, promptOverhead?: number): number;
+/**
+ * Get default stop sequences for a model type
+ *
+ * @param modelType - Generator model type
+ * @returns Array of stop sequences
+ */
+export declare function getDefaultStopSequences(modelType: GeneratorModelType): string[];
+//# sourceMappingURL=prompt-templates.d.ts.map