npm - rag-lite-ts - Versions diffs - 2.2.0 → 2.3.1 - Mend

rag-lite-ts 2.2.0 → 2.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (100) hide show

package/README.md +88 -5
package/dist/cjs/cli/indexer.js +73 -15
package/dist/cjs/cli/search.js +77 -2
package/dist/cjs/cli/ui-server.d.ts +5 -0
package/dist/cjs/cli/ui-server.js +152 -0
package/dist/cjs/cli.js +53 -7
package/dist/cjs/core/abstract-generator.d.ts +97 -0
package/dist/cjs/core/abstract-generator.js +222 -0
package/dist/cjs/core/binary-index-format.js +53 -10
package/dist/cjs/core/db.d.ts +56 -0
package/dist/cjs/core/db.js +105 -0
package/dist/cjs/core/generator-registry.d.ts +114 -0
package/dist/cjs/core/generator-registry.js +280 -0
package/dist/cjs/core/index.d.ts +4 -0
package/dist/cjs/core/index.js +11 -0
package/dist/cjs/core/ingestion.js +3 -0
package/dist/cjs/core/knowledge-base-manager.d.ts +109 -0
package/dist/cjs/core/knowledge-base-manager.js +256 -0
package/dist/cjs/core/lazy-dependency-loader.d.ts +43 -0
package/dist/cjs/core/lazy-dependency-loader.js +111 -2
package/dist/cjs/core/prompt-templates.d.ts +138 -0
package/dist/cjs/core/prompt-templates.js +225 -0
package/dist/cjs/core/response-generator.d.ts +132 -0
package/dist/cjs/core/response-generator.js +69 -0
package/dist/cjs/core/search-pipeline.js +1 -1
package/dist/cjs/core/search.d.ts +72 -1
package/dist/cjs/core/search.js +80 -7
package/dist/cjs/core/types.d.ts +1 -0
package/dist/cjs/core/vector-index-messages.d.ts +52 -0
package/dist/cjs/core/vector-index-messages.js +5 -0
package/dist/cjs/core/vector-index-worker.d.ts +6 -0
package/dist/cjs/core/vector-index-worker.js +314 -0
package/dist/cjs/core/vector-index.d.ts +45 -10
package/dist/cjs/core/vector-index.js +279 -218
package/dist/cjs/factories/generator-factory.d.ts +88 -0
package/dist/cjs/factories/generator-factory.js +151 -0
package/dist/cjs/factories/index.d.ts +1 -0
package/dist/cjs/factories/index.js +5 -0
package/dist/cjs/factories/ingestion-factory.js +3 -7
package/dist/cjs/factories/search-factory.js +11 -0
package/dist/cjs/index-manager.d.ts +23 -3
package/dist/cjs/index-manager.js +84 -15
package/dist/cjs/index.d.ts +11 -1
package/dist/cjs/index.js +19 -1
package/dist/cjs/text/generators/causal-lm-generator.d.ts +65 -0
package/dist/cjs/text/generators/causal-lm-generator.js +197 -0
package/dist/cjs/text/generators/index.d.ts +10 -0
package/dist/cjs/text/generators/index.js +10 -0
package/dist/cjs/text/generators/instruct-generator.d.ts +62 -0
package/dist/cjs/text/generators/instruct-generator.js +192 -0
package/dist/esm/cli/indexer.js +73 -15
package/dist/esm/cli/search.js +77 -2
package/dist/esm/cli/ui-server.d.ts +5 -0
package/dist/esm/cli/ui-server.js +152 -0
package/dist/esm/cli.js +53 -7
package/dist/esm/core/abstract-generator.d.ts +97 -0
package/dist/esm/core/abstract-generator.js +222 -0
package/dist/esm/core/binary-index-format.js +53 -10
package/dist/esm/core/db.d.ts +56 -0
package/dist/esm/core/db.js +105 -0
package/dist/esm/core/generator-registry.d.ts +114 -0
package/dist/esm/core/generator-registry.js +280 -0
package/dist/esm/core/index.d.ts +4 -0
package/dist/esm/core/index.js +11 -0
package/dist/esm/core/ingestion.js +3 -0
package/dist/esm/core/knowledge-base-manager.d.ts +109 -0
package/dist/esm/core/knowledge-base-manager.js +256 -0
package/dist/esm/core/lazy-dependency-loader.d.ts +43 -0
package/dist/esm/core/lazy-dependency-loader.js +111 -2
package/dist/esm/core/prompt-templates.d.ts +138 -0
package/dist/esm/core/prompt-templates.js +225 -0
package/dist/esm/core/response-generator.d.ts +132 -0
package/dist/esm/core/response-generator.js +69 -0
package/dist/esm/core/search-pipeline.js +1 -1
package/dist/esm/core/search.d.ts +72 -1
package/dist/esm/core/search.js +80 -7
package/dist/esm/core/types.d.ts +1 -0
package/dist/esm/core/vector-index-messages.d.ts +52 -0
package/dist/esm/core/vector-index-messages.js +5 -0
package/dist/esm/core/vector-index-worker.d.ts +6 -0
package/dist/esm/core/vector-index-worker.js +314 -0
package/dist/esm/core/vector-index.d.ts +45 -10
package/dist/esm/core/vector-index.js +279 -218
package/dist/esm/factories/generator-factory.d.ts +88 -0
package/dist/esm/factories/generator-factory.js +151 -0
package/dist/esm/factories/index.d.ts +1 -0
package/dist/esm/factories/index.js +5 -0
package/dist/esm/factories/ingestion-factory.js +3 -7
package/dist/esm/factories/search-factory.js +11 -0
package/dist/esm/index-manager.d.ts +23 -3
package/dist/esm/index-manager.js +84 -15
package/dist/esm/index.d.ts +11 -1
package/dist/esm/index.js +19 -1
package/dist/esm/text/generators/causal-lm-generator.d.ts +65 -0
package/dist/esm/text/generators/causal-lm-generator.js +197 -0
package/dist/esm/text/generators/index.d.ts +10 -0
package/dist/esm/text/generators/index.js +10 -0
package/dist/esm/text/generators/instruct-generator.d.ts +62 -0
package/dist/esm/text/generators/instruct-generator.js +192 -0
package/package.json +14 -7

package/dist/esm/core/lazy-dependency-loader.d.ts CHANGED Viewed

@@ -8,6 +8,7 @@
 import '../dom-polyfills.js';
 import type { UniversalEmbedder } from './universal-embedder.js';
 import type { RerankFunction } from './interfaces.js';
+import type { ResponseGenerator } from './response-generator.js';
 /**
  * Lazy loader for embedder implementations
  * Only loads the specific embedder type when needed
@@ -42,6 +43,42 @@ export declare class LazyEmbedderLoader {
         multimodalEmbedders: number;
     };
 }
+/**
+ * Lazy loader for response generator implementations
+ * Only loads the specific generator type when needed
+ *
+ * @experimental This feature is experimental and may change in future versions.
+ */
+export declare class LazyGeneratorLoader {
+    private static cache;
+    /**
+     * Lazily load and create an instruct generator (SmolLM2-Instruct)
+     * Only imports the module when generation is actually requested
+     */
+    static loadInstructGenerator(modelName: string, options?: any): Promise<ResponseGenerator>;
+    /**
+     * Lazily load and create a causal LM generator (DistilGPT2)
+     * Only imports the module when generation is actually requested
+     */
+    static loadCausalLMGenerator(modelName: string, options?: any): Promise<ResponseGenerator>;
+    /**
+     * Check if a generator is already loaded in cache
+     */
+    static isGeneratorLoaded(modelName: string, modelType: 'instruct' | 'causal-lm'): boolean;
+    /**
+     * Remove a generator from the cache (called when generator is cleaned up)
+     */
+    static removeGeneratorFromCache(modelName: string, modelType: 'instruct' | 'causal-lm'): void;
+    /**
+     * Get statistics about loaded generators
+     */
+    static getLoadingStats(): {
+        loadedGenerators: string[];
+        totalLoaded: number;
+        instructGenerators: number;
+        causalLMGenerators: number;
+    };
+}
 /**
  * Lazy loader for reranking implementations
  * Only loads the specific reranker type when needed
@@ -107,6 +144,11 @@ export declare class LazyMultimodalLoader {
  * Provides a single entry point for dependency management
  */
 export declare class LazyDependencyManager {
+    /**
+     * Load response generator based on model type with lazy loading
+     * @experimental This feature is experimental and may change in future versions.
+     */
+    static loadGenerator(modelName: string, modelType: 'instruct' | 'causal-lm', options?: any): Promise<ResponseGenerator>;
     /**
      * Load embedder based on model type with lazy loading
      */
@@ -121,6 +163,7 @@ export declare class LazyDependencyManager {
     static getLoadingStatistics(): {
         embedders: ReturnType<typeof LazyEmbedderLoader.getLoadingStats>;
         rerankers: ReturnType<typeof LazyRerankerLoader.getLoadingStats>;
+        generators: ReturnType<typeof LazyGeneratorLoader.getLoadingStats>;
         multimodal: ReturnType<typeof LazyMultimodalLoader.getMultimodalLoadingStatus>;
         totalModulesLoaded: number;
         memoryImpact: 'low' | 'medium' | 'high';

package/dist/esm/core/lazy-dependency-loader.js CHANGED Viewed

@@ -149,6 +149,99 @@ export class LazyEmbedderLoader {
     }
 }
 // =============================================================================
+// LAZY GENERATOR LOADING
+// =============================================================================
+/**
+ * Lazy loader for response generator implementations
+ * Only loads the specific generator type when needed
+ *
+ * @experimental This feature is experimental and may change in future versions.
+ */
+export class LazyGeneratorLoader {
+    static cache = LazyLoadingCache.getInstance();
+    /**
+     * Lazily load and create an instruct generator (SmolLM2-Instruct)
+     * Only imports the module when generation is actually requested
+     */
+    static async loadInstructGenerator(modelName, options = {}) {
+        const cacheKey = `generator:instruct:${modelName}`;
+        return this.cache.getOrLoad(cacheKey, async () => {
+            try {
+                console.log(`🔄 [EXPERIMENTAL] Lazy loading instruct generator: ${modelName}`);
+                // Dynamic import - only loaded when generation is requested
+                const { InstructGenerator } = await import('../text/generators/instruct-generator.js');
+                const generator = new InstructGenerator(modelName, options);
+                await generator.loadModel();
+                console.log(`✅ Instruct generator loaded: ${modelName}`);
+                return generator;
+            }
+            catch (error) {
+                const enhancedError = createError.model(`Failed to lazy load instruct generator '${modelName}': ${error instanceof Error ? error.message : 'Unknown error'}`);
+                handleError(enhancedError, 'LazyGeneratorLoader', {
+                    severity: ErrorSeverity.ERROR,
+                    category: ErrorCategory.MODEL
+                });
+                throw enhancedError;
+            }
+        });
+    }
+    /**
+     * Lazily load and create a causal LM generator (DistilGPT2)
+     * Only imports the module when generation is actually requested
+     */
+    static async loadCausalLMGenerator(modelName, options = {}) {
+        const cacheKey = `generator:causal-lm:${modelName}`;
+        return this.cache.getOrLoad(cacheKey, async () => {
+            try {
+                console.log(`🔄 [EXPERIMENTAL] Lazy loading causal LM generator: ${modelName}`);
+                // Dynamic import - only loaded when generation is requested
+                const { CausalLMGenerator } = await import('../text/generators/causal-lm-generator.js');
+                const generator = new CausalLMGenerator(modelName, options);
+                await generator.loadModel();
+                console.log(`✅ Causal LM generator loaded: ${modelName}`);
+                return generator;
+            }
+            catch (error) {
+                const enhancedError = createError.model(`Failed to lazy load causal LM generator '${modelName}': ${error instanceof Error ? error.message : 'Unknown error'}`);
+                handleError(enhancedError, 'LazyGeneratorLoader', {
+                    severity: ErrorSeverity.ERROR,
+                    category: ErrorCategory.MODEL
+                });
+                throw enhancedError;
+            }
+        });
+    }
+    /**
+     * Check if a generator is already loaded in cache
+     */
+    static isGeneratorLoaded(modelName, modelType) {
+        const cacheKey = `generator:${modelType}:${modelName}`;
+        return this.cache.getLoadedModules().includes(cacheKey);
+    }
+    /**
+     * Remove a generator from the cache (called when generator is cleaned up)
+     */
+    static removeGeneratorFromCache(modelName, modelType) {
+        const cacheKey = `generator:${modelType}:${modelName}`;
+        this.cache.remove(cacheKey);
+        console.log(`🧹 Removed generator from cache: ${cacheKey}`);
+    }
+    /**
+     * Get statistics about loaded generators
+     */
+    static getLoadingStats() {
+        const loadedModules = this.cache.getLoadedModules().filter(key => key.startsWith('generator:'));
+        const instructGenerators = loadedModules.filter(key => key.includes(':instruct:')).length;
+        const causalLMGenerators = loadedModules.filter(key => key.includes(':causal-lm:')).length;
+        return {
+            loadedGenerators: loadedModules,
+            totalLoaded: loadedModules.length,
+            instructGenerators,
+            causalLMGenerators
+        };
+    }
+}
+// =============================================================================
 // LAZY RERANKER LOADING
 // =============================================================================
 /**
@@ -332,6 +425,20 @@ export class LazyMultimodalLoader {
  * Provides a single entry point for dependency management
  */
 export class LazyDependencyManager {
+    /**
+     * Load response generator based on model type with lazy loading
+     * @experimental This feature is experimental and may change in future versions.
+     */
+    static async loadGenerator(modelName, modelType, options = {}) {
+        switch (modelType) {
+            case 'instruct':
+                return LazyGeneratorLoader.loadInstructGenerator(modelName, options);
+            case 'causal-lm':
+                return LazyGeneratorLoader.loadCausalLMGenerator(modelName, options);
+            default:
+                throw createError.validation(`Unsupported generator model type for lazy loading: ${modelType}`);
+        }
+    }
     /**
      * Load embedder based on model type with lazy loading
      */
@@ -367,19 +474,21 @@ export class LazyDependencyManager {
     static getLoadingStatistics() {
         const embedderStats = LazyEmbedderLoader.getLoadingStats();
         const rerankerStats = LazyRerankerLoader.getLoadingStats();
+        const generatorStats = LazyGeneratorLoader.getLoadingStats();
         const multimodalStats = LazyMultimodalLoader.getMultimodalLoadingStatus();
-        const totalModules = embedderStats.totalLoaded + rerankerStats.totalLoaded + multimodalStats.loadedProcessors.length;
+        const totalModules = embedderStats.totalLoaded + rerankerStats.totalLoaded + generatorStats.totalLoaded + multimodalStats.loadedProcessors.length;
         // Estimate memory impact based on loaded modules
         let memoryImpact = 'low';
         if (embedderStats.multimodalEmbedders > 0 || multimodalStats.imageToTextLoaded) {
             memoryImpact = 'high';
         }
-        else if (totalModules > 2) {
+        else if (totalModules > 2 || generatorStats.totalLoaded > 0) {
             memoryImpact = 'medium';
         }
         return {
             embedders: embedderStats,
             rerankers: rerankerStats,
+            generators: generatorStats,
             multimodal: multimodalStats,
             totalModulesLoaded: totalModules,
             memoryImpact

package/dist/esm/core/prompt-templates.d.ts ADDED Viewed

@@ -0,0 +1,138 @@
+/**
+ * CORE MODULE — Prompt Templates for RAG Response Generation
+ *
+ * Provides prompt engineering utilities for different generator model types.
+ * Handles context formatting, token budget management, and system prompts.
+ *
+ * PROMPT STRATEGIES:
+ * - Instruct models: Use chat template with system/user/assistant roles
+ * - Causal LM models: Use simple document + question format
+ *
+ * @experimental This feature is experimental and may change in future versions.
+ */
+import type { SearchResult } from './types.js';
+import type { GeneratorModelType } from './response-generator.js';
+/**
+ * Default system prompt for instruct models
+ * Emphasizes grounded responses using only provided context
+ */
+export declare const DEFAULT_SYSTEM_PROMPT = "You are a helpful assistant that answers questions based ONLY on the provided context documents. Follow these rules strictly:\n\n1. Answer ONLY using information found in the context documents\n2. If the answer cannot be found in the context, say \"I cannot find this information in the provided documents\"\n3. Do not make up information or use external knowledge\n4. Be concise and direct in your response\n5. If the context is incomplete or unclear, acknowledge this limitation";
+/**
+ * Default system prompt for RAG with source attribution
+ */
+export declare const DEFAULT_SYSTEM_PROMPT_WITH_ATTRIBUTION = "You are a helpful assistant that answers questions based ONLY on the provided context documents. Follow these rules strictly:\n\n1. Answer ONLY using information found in the context documents\n2. When possible, mention which document the information comes from\n3. If the answer cannot be found in the context, say \"I cannot find this information in the provided documents\"\n4. Do not make up information or use external knowledge\n5. Be concise and direct in your response";
+/**
+ * SmolLM2 chat template format
+ * Uses <|im_start|> and <|im_end|> tokens
+ */
+export declare const SMOLLM2_CHAT_TEMPLATE: {
+    systemStart: string;
+    systemEnd: string;
+    userStart: string;
+    userEnd: string;
+    assistantStart: string;
+    assistantEnd: string;
+    endOfText: string;
+};
+/**
+ * Options for formatting context chunks
+ */
+export interface ContextFormattingOptions {
+    /** Maximum tokens available for context */
+    maxContextTokens: number;
+    /** Include document titles/sources */
+    includeDocumentInfo?: boolean;
+    /** Include relevance scores */
+    includeScores?: boolean;
+    /** Separator between chunks */
+    chunkSeparator?: string;
+    /** Token estimation function (chars to tokens ratio) */
+    tokenEstimationRatio?: number;
+}
+/**
+ * Result of context formatting
+ */
+export interface FormattedContext {
+    /** Formatted context string */
+    text: string;
+    /** Estimated token count */
+    estimatedTokens: number;
+    /** Number of chunks included */
+    chunksIncluded: number;
+    /** Total chunks available */
+    totalChunks: number;
+    /** Whether context was truncated */
+    truncated: boolean;
+}
+/**
+ * Format search result chunks into context string for the prompt
+ *
+ * @param chunks - Search result chunks to format
+ * @param options - Formatting options
+ * @returns Formatted context with metadata
+ */
+export declare function formatContextChunks(chunks: SearchResult[], options: ContextFormattingOptions): FormattedContext;
+/**
+ * Options for building the complete prompt
+ */
+export interface PromptBuildOptions {
+    /** User's query */
+    query: string;
+    /** Search result chunks */
+    chunks: SearchResult[];
+    /** Generator model type */
+    modelType: GeneratorModelType;
+    /** Custom system prompt (optional) */
+    systemPrompt?: string;
+    /** Maximum context window tokens */
+    maxContextLength: number;
+    /** Tokens reserved for output */
+    reservedOutputTokens: number;
+    /** Include source attribution hint */
+    includeSourceAttribution?: boolean;
+}
+/**
+ * Result of prompt building
+ */
+export interface BuiltPrompt {
+    /** Complete prompt string */
+    prompt: string;
+    /** Estimated total tokens */
+    estimatedTokens: number;
+    /** Context metadata */
+    contextInfo: FormattedContext;
+    /** System prompt used (if any) */
+    systemPromptUsed?: string;
+}
+/**
+ * Build a complete prompt for the generator model
+ *
+ * @param options - Prompt building options
+ * @returns Built prompt with metadata
+ */
+export declare function buildPrompt(options: PromptBuildOptions): BuiltPrompt;
+/**
+ * Estimate token count for a string
+ * Uses a simple character-based heuristic (~4 chars per token for English)
+ *
+ * @param text - Text to estimate tokens for
+ * @returns Estimated token count
+ */
+export declare function estimateTokenCount(text: string): number;
+/**
+ * Calculate available context budget
+ *
+ * @param maxContextLength - Maximum context window size
+ * @param reservedOutputTokens - Tokens reserved for generation
+ * @param promptOverhead - Tokens used by prompt formatting
+ * @returns Available tokens for context chunks
+ */
+export declare function calculateContextBudget(maxContextLength: number, reservedOutputTokens: number, promptOverhead?: number): number;
+/**
+ * Get default stop sequences for a model type
+ *
+ * @param modelType - Generator model type
+ * @returns Array of stop sequences
+ */
+export declare function getDefaultStopSequences(modelType: GeneratorModelType): string[];
+//# sourceMappingURL=prompt-templates.d.ts.map

package/dist/esm/core/prompt-templates.js ADDED Viewed

@@ -0,0 +1,225 @@
+/**
+ * CORE MODULE — Prompt Templates for RAG Response Generation
+ *
+ * Provides prompt engineering utilities for different generator model types.
+ * Handles context formatting, token budget management, and system prompts.
+ *
+ * PROMPT STRATEGIES:
+ * - Instruct models: Use chat template with system/user/assistant roles
+ * - Causal LM models: Use simple document + question format
+ *
+ * @experimental This feature is experimental and may change in future versions.
+ */
+// =============================================================================
+// DEFAULT PROMPTS
+// =============================================================================
+/**
+ * Default system prompt for instruct models
+ * Emphasizes grounded responses using only provided context
+ */
+export const DEFAULT_SYSTEM_PROMPT = `You are a helpful assistant that answers questions based ONLY on the provided context documents. Follow these rules strictly:
+1. Answer ONLY using information found in the context documents
+2. If the answer cannot be found in the context, say "I cannot find this information in the provided documents"
+3. Do not make up information or use external knowledge
+4. Be concise and direct in your response
+5. If the context is incomplete or unclear, acknowledge this limitation`;
+/**
+ * Default system prompt for RAG with source attribution
+ */
+export const DEFAULT_SYSTEM_PROMPT_WITH_ATTRIBUTION = `You are a helpful assistant that answers questions based ONLY on the provided context documents. Follow these rules strictly:
+1. Answer ONLY using information found in the context documents
+2. When possible, mention which document the information comes from
+3. If the answer cannot be found in the context, say "I cannot find this information in the provided documents"
+4. Do not make up information or use external knowledge
+5. Be concise and direct in your response`;
+// =============================================================================
+// CHAT TEMPLATES
+// =============================================================================
+/**
+ * SmolLM2 chat template format
+ * Uses <|im_start|> and <|im_end|> tokens
+ */
+export const SMOLLM2_CHAT_TEMPLATE = {
+    systemStart: '<|im_start|>system\n',
+    systemEnd: '<|im_end|>\n',
+    userStart: '<|im_start|>user\n',
+    userEnd: '<|im_end|>\n',
+    assistantStart: '<|im_start|>assistant\n',
+    assistantEnd: '<|im_end|>',
+    endOfText: '<|endoftext|>'
+};
+/**
+ * Format search result chunks into context string for the prompt
+ *
+ * @param chunks - Search result chunks to format
+ * @param options - Formatting options
+ * @returns Formatted context with metadata
+ */
+export function formatContextChunks(chunks, options) {
+    const { maxContextTokens, includeDocumentInfo = true, includeScores = false, chunkSeparator = '\n---\n', tokenEstimationRatio = 4 // ~4 chars per token for English
+     } = options;
+    const maxChars = maxContextTokens * tokenEstimationRatio;
+    let currentChars = 0;
+    const includedChunks = [];
+    let truncated = false;
+    for (let i = 0; i < chunks.length; i++) {
+        const chunk = chunks[i];
+        // Format this chunk
+        let chunkText = '';
+        if (includeDocumentInfo) {
+            chunkText += `[Document ${i + 1}: ${chunk.document.title}]`;
+            if (includeScores) {
+                chunkText += ` (relevance: ${(chunk.score * 100).toFixed(1)}%)`;
+            }
+            chunkText += '\n';
+        }
+        chunkText += chunk.content;
+        // Check if adding this chunk would exceed budget
+        const chunkChars = chunkText.length + (includedChunks.length > 0 ? chunkSeparator.length : 0);
+        if (currentChars + chunkChars > maxChars) {
+            // Check if we can fit a truncated version of this chunk
+            const remainingChars = maxChars - currentChars - (includedChunks.length > 0 ? chunkSeparator.length : 0);
+            if (remainingChars > 100 && includedChunks.length === 0) {
+                // Truncate the first chunk if it's the only option
+                chunkText = chunkText.substring(0, remainingChars - 20) + '\n[Content truncated...]';
+                includedChunks.push(chunkText);
+                currentChars += chunkText.length;
+            }
+            truncated = true;
+            break;
+        }
+        includedChunks.push(chunkText);
+        currentChars += chunkChars;
+    }
+    const text = includedChunks.join(chunkSeparator);
+    const estimatedTokens = Math.ceil(text.length / tokenEstimationRatio);
+    return {
+        text,
+        estimatedTokens,
+        chunksIncluded: includedChunks.length,
+        totalChunks: chunks.length,
+        truncated
+    };
+}
+/**
+ * Build a complete prompt for the generator model
+ *
+ * @param options - Prompt building options
+ * @returns Built prompt with metadata
+ */
+export function buildPrompt(options) {
+    const { query, chunks, modelType, systemPrompt, maxContextLength, reservedOutputTokens, includeSourceAttribution = false } = options;
+    // Calculate available tokens for context
+    const promptOverhead = modelType === 'instruct' ? 150 : 50; // Tokens for formatting
+    const queryTokens = Math.ceil(query.length / 4);
+    const availableContextTokens = maxContextLength - reservedOutputTokens - promptOverhead - queryTokens;
+    // Format context chunks
+    const contextInfo = formatContextChunks(chunks, {
+        maxContextTokens: availableContextTokens,
+        includeDocumentInfo: true,
+        includeScores: false
+    });
+    // Build prompt based on model type
+    let prompt;
+    let systemPromptUsed;
+    if (modelType === 'instruct') {
+        prompt = buildInstructPrompt(query, contextInfo.text, systemPrompt, includeSourceAttribution);
+        systemPromptUsed = systemPrompt || (includeSourceAttribution ? DEFAULT_SYSTEM_PROMPT_WITH_ATTRIBUTION : DEFAULT_SYSTEM_PROMPT);
+    }
+    else {
+        prompt = buildCausalLMPrompt(query, contextInfo.text);
+    }
+    const estimatedTokens = Math.ceil(prompt.length / 4);
+    return {
+        prompt,
+        estimatedTokens,
+        contextInfo,
+        systemPromptUsed
+    };
+}
+/**
+ * Build prompt for instruct models (SmolLM2-Instruct)
+ * Uses chat template format with system/user/assistant roles
+ */
+function buildInstructPrompt(query, context, customSystemPrompt, includeSourceAttribution = false) {
+    const systemPrompt = customSystemPrompt ||
+        (includeSourceAttribution ? DEFAULT_SYSTEM_PROMPT_WITH_ATTRIBUTION : DEFAULT_SYSTEM_PROMPT);
+    const template = SMOLLM2_CHAT_TEMPLATE;
+    const userMessage = `Context:
+${context}
+Question: ${query}
+Answer based only on the context above:`;
+    return `${template.systemStart}${systemPrompt}${template.systemEnd}${template.userStart}${userMessage}${template.userEnd}${template.assistantStart}`;
+}
+/**
+ * Build prompt for causal LM models (DistilGPT2)
+ * Uses simple document + question format without roles
+ */
+function buildCausalLMPrompt(query, context) {
+    return `The following documents contain information to answer the question.
+Documents:
+${context}
+Based on the documents above, answer this question: ${query}
+Answer:`;
+}
+// =============================================================================
+// TOKEN ESTIMATION
+// =============================================================================
+/**
+ * Estimate token count for a string
+ * Uses a simple character-based heuristic (~4 chars per token for English)
+ *
+ * @param text - Text to estimate tokens for
+ * @returns Estimated token count
+ */
+export function estimateTokenCount(text) {
+    // Simple heuristic: ~4 characters per token for English text
+    // This is a rough approximation; actual tokenization varies by model
+    return Math.ceil(text.length / 4);
+}
+/**
+ * Calculate available context budget
+ *
+ * @param maxContextLength - Maximum context window size
+ * @param reservedOutputTokens - Tokens reserved for generation
+ * @param promptOverhead - Tokens used by prompt formatting
+ * @returns Available tokens for context chunks
+ */
+export function calculateContextBudget(maxContextLength, reservedOutputTokens, promptOverhead = 100) {
+    return Math.max(0, maxContextLength - reservedOutputTokens - promptOverhead);
+}
+// =============================================================================
+// STOP SEQUENCES
+// =============================================================================
+/**
+ * Get default stop sequences for a model type
+ *
+ * @param modelType - Generator model type
+ * @returns Array of stop sequences
+ */
+export function getDefaultStopSequences(modelType) {
+    if (modelType === 'instruct') {
+        return [
+            SMOLLM2_CHAT_TEMPLATE.assistantEnd,
+            SMOLLM2_CHAT_TEMPLATE.endOfText,
+            '<|im_start|>',
+            '\n\nQuestion:',
+            '\n\nContext:'
+        ];
+    }
+    // Causal LM stop sequences
+    return [
+        '\n\nQuestion:',
+        '\n\nDocuments:',
+        '\n\n---',
+        '<|endoftext|>'
+    ];
+}
+//# sourceMappingURL=prompt-templates.js.map