rag-lite-ts 2.2.0 → 2.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (100) hide show
  1. package/README.md +88 -5
  2. package/dist/cjs/cli/indexer.js +73 -15
  3. package/dist/cjs/cli/search.js +77 -2
  4. package/dist/cjs/cli/ui-server.d.ts +5 -0
  5. package/dist/cjs/cli/ui-server.js +152 -0
  6. package/dist/cjs/cli.js +53 -7
  7. package/dist/cjs/core/abstract-generator.d.ts +97 -0
  8. package/dist/cjs/core/abstract-generator.js +222 -0
  9. package/dist/cjs/core/binary-index-format.js +53 -10
  10. package/dist/cjs/core/db.d.ts +56 -0
  11. package/dist/cjs/core/db.js +105 -0
  12. package/dist/cjs/core/generator-registry.d.ts +114 -0
  13. package/dist/cjs/core/generator-registry.js +280 -0
  14. package/dist/cjs/core/index.d.ts +4 -0
  15. package/dist/cjs/core/index.js +11 -0
  16. package/dist/cjs/core/ingestion.js +3 -0
  17. package/dist/cjs/core/knowledge-base-manager.d.ts +109 -0
  18. package/dist/cjs/core/knowledge-base-manager.js +256 -0
  19. package/dist/cjs/core/lazy-dependency-loader.d.ts +43 -0
  20. package/dist/cjs/core/lazy-dependency-loader.js +111 -2
  21. package/dist/cjs/core/prompt-templates.d.ts +138 -0
  22. package/dist/cjs/core/prompt-templates.js +225 -0
  23. package/dist/cjs/core/response-generator.d.ts +132 -0
  24. package/dist/cjs/core/response-generator.js +69 -0
  25. package/dist/cjs/core/search-pipeline.js +1 -1
  26. package/dist/cjs/core/search.d.ts +72 -1
  27. package/dist/cjs/core/search.js +80 -7
  28. package/dist/cjs/core/types.d.ts +1 -0
  29. package/dist/cjs/core/vector-index-messages.d.ts +52 -0
  30. package/dist/cjs/core/vector-index-messages.js +5 -0
  31. package/dist/cjs/core/vector-index-worker.d.ts +6 -0
  32. package/dist/cjs/core/vector-index-worker.js +314 -0
  33. package/dist/cjs/core/vector-index.d.ts +45 -10
  34. package/dist/cjs/core/vector-index.js +279 -218
  35. package/dist/cjs/factories/generator-factory.d.ts +88 -0
  36. package/dist/cjs/factories/generator-factory.js +151 -0
  37. package/dist/cjs/factories/index.d.ts +1 -0
  38. package/dist/cjs/factories/index.js +5 -0
  39. package/dist/cjs/factories/ingestion-factory.js +3 -7
  40. package/dist/cjs/factories/search-factory.js +11 -0
  41. package/dist/cjs/index-manager.d.ts +23 -3
  42. package/dist/cjs/index-manager.js +84 -15
  43. package/dist/cjs/index.d.ts +11 -1
  44. package/dist/cjs/index.js +19 -1
  45. package/dist/cjs/text/generators/causal-lm-generator.d.ts +65 -0
  46. package/dist/cjs/text/generators/causal-lm-generator.js +197 -0
  47. package/dist/cjs/text/generators/index.d.ts +10 -0
  48. package/dist/cjs/text/generators/index.js +10 -0
  49. package/dist/cjs/text/generators/instruct-generator.d.ts +62 -0
  50. package/dist/cjs/text/generators/instruct-generator.js +192 -0
  51. package/dist/esm/cli/indexer.js +73 -15
  52. package/dist/esm/cli/search.js +77 -2
  53. package/dist/esm/cli/ui-server.d.ts +5 -0
  54. package/dist/esm/cli/ui-server.js +152 -0
  55. package/dist/esm/cli.js +53 -7
  56. package/dist/esm/core/abstract-generator.d.ts +97 -0
  57. package/dist/esm/core/abstract-generator.js +222 -0
  58. package/dist/esm/core/binary-index-format.js +53 -10
  59. package/dist/esm/core/db.d.ts +56 -0
  60. package/dist/esm/core/db.js +105 -0
  61. package/dist/esm/core/generator-registry.d.ts +114 -0
  62. package/dist/esm/core/generator-registry.js +280 -0
  63. package/dist/esm/core/index.d.ts +4 -0
  64. package/dist/esm/core/index.js +11 -0
  65. package/dist/esm/core/ingestion.js +3 -0
  66. package/dist/esm/core/knowledge-base-manager.d.ts +109 -0
  67. package/dist/esm/core/knowledge-base-manager.js +256 -0
  68. package/dist/esm/core/lazy-dependency-loader.d.ts +43 -0
  69. package/dist/esm/core/lazy-dependency-loader.js +111 -2
  70. package/dist/esm/core/prompt-templates.d.ts +138 -0
  71. package/dist/esm/core/prompt-templates.js +225 -0
  72. package/dist/esm/core/response-generator.d.ts +132 -0
  73. package/dist/esm/core/response-generator.js +69 -0
  74. package/dist/esm/core/search-pipeline.js +1 -1
  75. package/dist/esm/core/search.d.ts +72 -1
  76. package/dist/esm/core/search.js +80 -7
  77. package/dist/esm/core/types.d.ts +1 -0
  78. package/dist/esm/core/vector-index-messages.d.ts +52 -0
  79. package/dist/esm/core/vector-index-messages.js +5 -0
  80. package/dist/esm/core/vector-index-worker.d.ts +6 -0
  81. package/dist/esm/core/vector-index-worker.js +314 -0
  82. package/dist/esm/core/vector-index.d.ts +45 -10
  83. package/dist/esm/core/vector-index.js +279 -218
  84. package/dist/esm/factories/generator-factory.d.ts +88 -0
  85. package/dist/esm/factories/generator-factory.js +151 -0
  86. package/dist/esm/factories/index.d.ts +1 -0
  87. package/dist/esm/factories/index.js +5 -0
  88. package/dist/esm/factories/ingestion-factory.js +3 -7
  89. package/dist/esm/factories/search-factory.js +11 -0
  90. package/dist/esm/index-manager.d.ts +23 -3
  91. package/dist/esm/index-manager.js +84 -15
  92. package/dist/esm/index.d.ts +11 -1
  93. package/dist/esm/index.js +19 -1
  94. package/dist/esm/text/generators/causal-lm-generator.d.ts +65 -0
  95. package/dist/esm/text/generators/causal-lm-generator.js +197 -0
  96. package/dist/esm/text/generators/index.d.ts +10 -0
  97. package/dist/esm/text/generators/index.js +10 -0
  98. package/dist/esm/text/generators/instruct-generator.d.ts +62 -0
  99. package/dist/esm/text/generators/instruct-generator.js +192 -0
  100. package/package.json +14 -7
@@ -8,6 +8,7 @@
8
8
  import '../dom-polyfills.js';
9
9
  import type { UniversalEmbedder } from './universal-embedder.js';
10
10
  import type { RerankFunction } from './interfaces.js';
11
+ import type { ResponseGenerator } from './response-generator.js';
11
12
  /**
12
13
  * Lazy loader for embedder implementations
13
14
  * Only loads the specific embedder type when needed
@@ -42,6 +43,42 @@ export declare class LazyEmbedderLoader {
42
43
  multimodalEmbedders: number;
43
44
  };
44
45
  }
46
+ /**
47
+ * Lazy loader for response generator implementations
48
+ * Only loads the specific generator type when needed
49
+ *
50
+ * @experimental This feature is experimental and may change in future versions.
51
+ */
52
+ export declare class LazyGeneratorLoader {
53
+ private static cache;
54
+ /**
55
+ * Lazily load and create an instruct generator (SmolLM2-Instruct)
56
+ * Only imports the module when generation is actually requested
57
+ */
58
+ static loadInstructGenerator(modelName: string, options?: any): Promise<ResponseGenerator>;
59
+ /**
60
+ * Lazily load and create a causal LM generator (DistilGPT2)
61
+ * Only imports the module when generation is actually requested
62
+ */
63
+ static loadCausalLMGenerator(modelName: string, options?: any): Promise<ResponseGenerator>;
64
+ /**
65
+ * Check if a generator is already loaded in cache
66
+ */
67
+ static isGeneratorLoaded(modelName: string, modelType: 'instruct' | 'causal-lm'): boolean;
68
+ /**
69
+ * Remove a generator from the cache (called when generator is cleaned up)
70
+ */
71
+ static removeGeneratorFromCache(modelName: string, modelType: 'instruct' | 'causal-lm'): void;
72
+ /**
73
+ * Get statistics about loaded generators
74
+ */
75
+ static getLoadingStats(): {
76
+ loadedGenerators: string[];
77
+ totalLoaded: number;
78
+ instructGenerators: number;
79
+ causalLMGenerators: number;
80
+ };
81
+ }
45
82
  /**
46
83
  * Lazy loader for reranking implementations
47
84
  * Only loads the specific reranker type when needed
@@ -107,6 +144,11 @@ export declare class LazyMultimodalLoader {
107
144
  * Provides a single entry point for dependency management
108
145
  */
109
146
  export declare class LazyDependencyManager {
147
+ /**
148
+ * Load response generator based on model type with lazy loading
149
+ * @experimental This feature is experimental and may change in future versions.
150
+ */
151
+ static loadGenerator(modelName: string, modelType: 'instruct' | 'causal-lm', options?: any): Promise<ResponseGenerator>;
110
152
  /**
111
153
  * Load embedder based on model type with lazy loading
112
154
  */
@@ -121,6 +163,7 @@ export declare class LazyDependencyManager {
121
163
  static getLoadingStatistics(): {
122
164
  embedders: ReturnType<typeof LazyEmbedderLoader.getLoadingStats>;
123
165
  rerankers: ReturnType<typeof LazyRerankerLoader.getLoadingStats>;
166
+ generators: ReturnType<typeof LazyGeneratorLoader.getLoadingStats>;
124
167
  multimodal: ReturnType<typeof LazyMultimodalLoader.getMultimodalLoadingStatus>;
125
168
  totalModulesLoaded: number;
126
169
  memoryImpact: 'low' | 'medium' | 'high';
@@ -149,6 +149,99 @@ export class LazyEmbedderLoader {
149
149
  }
150
150
  }
151
151
  // =============================================================================
152
+ // LAZY GENERATOR LOADING
153
+ // =============================================================================
154
+ /**
155
+ * Lazy loader for response generator implementations
156
+ * Only loads the specific generator type when needed
157
+ *
158
+ * @experimental This feature is experimental and may change in future versions.
159
+ */
160
+ export class LazyGeneratorLoader {
161
+ static cache = LazyLoadingCache.getInstance();
162
+ /**
163
+ * Lazily load and create an instruct generator (SmolLM2-Instruct)
164
+ * Only imports the module when generation is actually requested
165
+ */
166
+ static async loadInstructGenerator(modelName, options = {}) {
167
+ const cacheKey = `generator:instruct:${modelName}`;
168
+ return this.cache.getOrLoad(cacheKey, async () => {
169
+ try {
170
+ console.log(`🔄 [EXPERIMENTAL] Lazy loading instruct generator: ${modelName}`);
171
+ // Dynamic import - only loaded when generation is requested
172
+ const { InstructGenerator } = await import('../text/generators/instruct-generator.js');
173
+ const generator = new InstructGenerator(modelName, options);
174
+ await generator.loadModel();
175
+ console.log(`✅ Instruct generator loaded: ${modelName}`);
176
+ return generator;
177
+ }
178
+ catch (error) {
179
+ const enhancedError = createError.model(`Failed to lazy load instruct generator '${modelName}': ${error instanceof Error ? error.message : 'Unknown error'}`);
180
+ handleError(enhancedError, 'LazyGeneratorLoader', {
181
+ severity: ErrorSeverity.ERROR,
182
+ category: ErrorCategory.MODEL
183
+ });
184
+ throw enhancedError;
185
+ }
186
+ });
187
+ }
188
+ /**
189
+ * Lazily load and create a causal LM generator (DistilGPT2)
190
+ * Only imports the module when generation is actually requested
191
+ */
192
+ static async loadCausalLMGenerator(modelName, options = {}) {
193
+ const cacheKey = `generator:causal-lm:${modelName}`;
194
+ return this.cache.getOrLoad(cacheKey, async () => {
195
+ try {
196
+ console.log(`🔄 [EXPERIMENTAL] Lazy loading causal LM generator: ${modelName}`);
197
+ // Dynamic import - only loaded when generation is requested
198
+ const { CausalLMGenerator } = await import('../text/generators/causal-lm-generator.js');
199
+ const generator = new CausalLMGenerator(modelName, options);
200
+ await generator.loadModel();
201
+ console.log(`✅ Causal LM generator loaded: ${modelName}`);
202
+ return generator;
203
+ }
204
+ catch (error) {
205
+ const enhancedError = createError.model(`Failed to lazy load causal LM generator '${modelName}': ${error instanceof Error ? error.message : 'Unknown error'}`);
206
+ handleError(enhancedError, 'LazyGeneratorLoader', {
207
+ severity: ErrorSeverity.ERROR,
208
+ category: ErrorCategory.MODEL
209
+ });
210
+ throw enhancedError;
211
+ }
212
+ });
213
+ }
214
+ /**
215
+ * Check if a generator is already loaded in cache
216
+ */
217
+ static isGeneratorLoaded(modelName, modelType) {
218
+ const cacheKey = `generator:${modelType}:${modelName}`;
219
+ return this.cache.getLoadedModules().includes(cacheKey);
220
+ }
221
+ /**
222
+ * Remove a generator from the cache (called when generator is cleaned up)
223
+ */
224
+ static removeGeneratorFromCache(modelName, modelType) {
225
+ const cacheKey = `generator:${modelType}:${modelName}`;
226
+ this.cache.remove(cacheKey);
227
+ console.log(`🧹 Removed generator from cache: ${cacheKey}`);
228
+ }
229
+ /**
230
+ * Get statistics about loaded generators
231
+ */
232
+ static getLoadingStats() {
233
+ const loadedModules = this.cache.getLoadedModules().filter(key => key.startsWith('generator:'));
234
+ const instructGenerators = loadedModules.filter(key => key.includes(':instruct:')).length;
235
+ const causalLMGenerators = loadedModules.filter(key => key.includes(':causal-lm:')).length;
236
+ return {
237
+ loadedGenerators: loadedModules,
238
+ totalLoaded: loadedModules.length,
239
+ instructGenerators,
240
+ causalLMGenerators
241
+ };
242
+ }
243
+ }
244
+ // =============================================================================
152
245
  // LAZY RERANKER LOADING
153
246
  // =============================================================================
154
247
  /**
@@ -332,6 +425,20 @@ export class LazyMultimodalLoader {
332
425
  * Provides a single entry point for dependency management
333
426
  */
334
427
  export class LazyDependencyManager {
428
+ /**
429
+ * Load response generator based on model type with lazy loading
430
+ * @experimental This feature is experimental and may change in future versions.
431
+ */
432
+ static async loadGenerator(modelName, modelType, options = {}) {
433
+ switch (modelType) {
434
+ case 'instruct':
435
+ return LazyGeneratorLoader.loadInstructGenerator(modelName, options);
436
+ case 'causal-lm':
437
+ return LazyGeneratorLoader.loadCausalLMGenerator(modelName, options);
438
+ default:
439
+ throw createError.validation(`Unsupported generator model type for lazy loading: ${modelType}`);
440
+ }
441
+ }
335
442
  /**
336
443
  * Load embedder based on model type with lazy loading
337
444
  */
@@ -367,19 +474,21 @@ export class LazyDependencyManager {
367
474
  static getLoadingStatistics() {
368
475
  const embedderStats = LazyEmbedderLoader.getLoadingStats();
369
476
  const rerankerStats = LazyRerankerLoader.getLoadingStats();
477
+ const generatorStats = LazyGeneratorLoader.getLoadingStats();
370
478
  const multimodalStats = LazyMultimodalLoader.getMultimodalLoadingStatus();
371
- const totalModules = embedderStats.totalLoaded + rerankerStats.totalLoaded + multimodalStats.loadedProcessors.length;
479
+ const totalModules = embedderStats.totalLoaded + rerankerStats.totalLoaded + generatorStats.totalLoaded + multimodalStats.loadedProcessors.length;
372
480
  // Estimate memory impact based on loaded modules
373
481
  let memoryImpact = 'low';
374
482
  if (embedderStats.multimodalEmbedders > 0 || multimodalStats.imageToTextLoaded) {
375
483
  memoryImpact = 'high';
376
484
  }
377
- else if (totalModules > 2) {
485
+ else if (totalModules > 2 || generatorStats.totalLoaded > 0) {
378
486
  memoryImpact = 'medium';
379
487
  }
380
488
  return {
381
489
  embedders: embedderStats,
382
490
  rerankers: rerankerStats,
491
+ generators: generatorStats,
383
492
  multimodal: multimodalStats,
384
493
  totalModulesLoaded: totalModules,
385
494
  memoryImpact
@@ -0,0 +1,138 @@
1
+ /**
2
+ * CORE MODULE — Prompt Templates for RAG Response Generation
3
+ *
4
+ * Provides prompt engineering utilities for different generator model types.
5
+ * Handles context formatting, token budget management, and system prompts.
6
+ *
7
+ * PROMPT STRATEGIES:
8
+ * - Instruct models: Use chat template with system/user/assistant roles
9
+ * - Causal LM models: Use simple document + question format
10
+ *
11
+ * @experimental This feature is experimental and may change in future versions.
12
+ */
13
+ import type { SearchResult } from './types.js';
14
+ import type { GeneratorModelType } from './response-generator.js';
15
+ /**
16
+ * Default system prompt for instruct models
17
+ * Emphasizes grounded responses using only provided context
18
+ */
19
+ export declare const DEFAULT_SYSTEM_PROMPT = "You are a helpful assistant that answers questions based ONLY on the provided context documents. Follow these rules strictly:\n\n1. Answer ONLY using information found in the context documents\n2. If the answer cannot be found in the context, say \"I cannot find this information in the provided documents\"\n3. Do not make up information or use external knowledge\n4. Be concise and direct in your response\n5. If the context is incomplete or unclear, acknowledge this limitation";
20
+ /**
21
+ * Default system prompt for RAG with source attribution
22
+ */
23
+ export declare const DEFAULT_SYSTEM_PROMPT_WITH_ATTRIBUTION = "You are a helpful assistant that answers questions based ONLY on the provided context documents. Follow these rules strictly:\n\n1. Answer ONLY using information found in the context documents\n2. When possible, mention which document the information comes from\n3. If the answer cannot be found in the context, say \"I cannot find this information in the provided documents\"\n4. Do not make up information or use external knowledge\n5. Be concise and direct in your response";
24
+ /**
25
+ * SmolLM2 chat template format
26
+ * Uses <|im_start|> and <|im_end|> tokens
27
+ */
28
+ export declare const SMOLLM2_CHAT_TEMPLATE: {
29
+ systemStart: string;
30
+ systemEnd: string;
31
+ userStart: string;
32
+ userEnd: string;
33
+ assistantStart: string;
34
+ assistantEnd: string;
35
+ endOfText: string;
36
+ };
37
+ /**
38
+ * Options for formatting context chunks
39
+ */
40
+ export interface ContextFormattingOptions {
41
+ /** Maximum tokens available for context */
42
+ maxContextTokens: number;
43
+ /** Include document titles/sources */
44
+ includeDocumentInfo?: boolean;
45
+ /** Include relevance scores */
46
+ includeScores?: boolean;
47
+ /** Separator between chunks */
48
+ chunkSeparator?: string;
49
+ /** Token estimation function (chars to tokens ratio) */
50
+ tokenEstimationRatio?: number;
51
+ }
52
+ /**
53
+ * Result of context formatting
54
+ */
55
+ export interface FormattedContext {
56
+ /** Formatted context string */
57
+ text: string;
58
+ /** Estimated token count */
59
+ estimatedTokens: number;
60
+ /** Number of chunks included */
61
+ chunksIncluded: number;
62
+ /** Total chunks available */
63
+ totalChunks: number;
64
+ /** Whether context was truncated */
65
+ truncated: boolean;
66
+ }
67
+ /**
68
+ * Format search result chunks into context string for the prompt
69
+ *
70
+ * @param chunks - Search result chunks to format
71
+ * @param options - Formatting options
72
+ * @returns Formatted context with metadata
73
+ */
74
+ export declare function formatContextChunks(chunks: SearchResult[], options: ContextFormattingOptions): FormattedContext;
75
+ /**
76
+ * Options for building the complete prompt
77
+ */
78
+ export interface PromptBuildOptions {
79
+ /** User's query */
80
+ query: string;
81
+ /** Search result chunks */
82
+ chunks: SearchResult[];
83
+ /** Generator model type */
84
+ modelType: GeneratorModelType;
85
+ /** Custom system prompt (optional) */
86
+ systemPrompt?: string;
87
+ /** Maximum context window tokens */
88
+ maxContextLength: number;
89
+ /** Tokens reserved for output */
90
+ reservedOutputTokens: number;
91
+ /** Include source attribution hint */
92
+ includeSourceAttribution?: boolean;
93
+ }
94
+ /**
95
+ * Result of prompt building
96
+ */
97
+ export interface BuiltPrompt {
98
+ /** Complete prompt string */
99
+ prompt: string;
100
+ /** Estimated total tokens */
101
+ estimatedTokens: number;
102
+ /** Context metadata */
103
+ contextInfo: FormattedContext;
104
+ /** System prompt used (if any) */
105
+ systemPromptUsed?: string;
106
+ }
107
+ /**
108
+ * Build a complete prompt for the generator model
109
+ *
110
+ * @param options - Prompt building options
111
+ * @returns Built prompt with metadata
112
+ */
113
+ export declare function buildPrompt(options: PromptBuildOptions): BuiltPrompt;
114
+ /**
115
+ * Estimate token count for a string
116
+ * Uses a simple character-based heuristic (~4 chars per token for English)
117
+ *
118
+ * @param text - Text to estimate tokens for
119
+ * @returns Estimated token count
120
+ */
121
+ export declare function estimateTokenCount(text: string): number;
122
+ /**
123
+ * Calculate available context budget
124
+ *
125
+ * @param maxContextLength - Maximum context window size
126
+ * @param reservedOutputTokens - Tokens reserved for generation
127
+ * @param promptOverhead - Tokens used by prompt formatting
128
+ * @returns Available tokens for context chunks
129
+ */
130
+ export declare function calculateContextBudget(maxContextLength: number, reservedOutputTokens: number, promptOverhead?: number): number;
131
+ /**
132
+ * Get default stop sequences for a model type
133
+ *
134
+ * @param modelType - Generator model type
135
+ * @returns Array of stop sequences
136
+ */
137
+ export declare function getDefaultStopSequences(modelType: GeneratorModelType): string[];
138
+ //# sourceMappingURL=prompt-templates.d.ts.map
@@ -0,0 +1,225 @@
1
+ /**
2
+ * CORE MODULE — Prompt Templates for RAG Response Generation
3
+ *
4
+ * Provides prompt engineering utilities for different generator model types.
5
+ * Handles context formatting, token budget management, and system prompts.
6
+ *
7
+ * PROMPT STRATEGIES:
8
+ * - Instruct models: Use chat template with system/user/assistant roles
9
+ * - Causal LM models: Use simple document + question format
10
+ *
11
+ * @experimental This feature is experimental and may change in future versions.
12
+ */
13
+ // =============================================================================
14
+ // DEFAULT PROMPTS
15
+ // =============================================================================
16
+ /**
17
+ * Default system prompt for instruct models
18
+ * Emphasizes grounded responses using only provided context
19
+ */
20
+ export const DEFAULT_SYSTEM_PROMPT = `You are a helpful assistant that answers questions based ONLY on the provided context documents. Follow these rules strictly:
21
+
22
+ 1. Answer ONLY using information found in the context documents
23
+ 2. If the answer cannot be found in the context, say "I cannot find this information in the provided documents"
24
+ 3. Do not make up information or use external knowledge
25
+ 4. Be concise and direct in your response
26
+ 5. If the context is incomplete or unclear, acknowledge this limitation`;
27
+ /**
28
+ * Default system prompt for RAG with source attribution
29
+ */
30
+ export const DEFAULT_SYSTEM_PROMPT_WITH_ATTRIBUTION = `You are a helpful assistant that answers questions based ONLY on the provided context documents. Follow these rules strictly:
31
+
32
+ 1. Answer ONLY using information found in the context documents
33
+ 2. When possible, mention which document the information comes from
34
+ 3. If the answer cannot be found in the context, say "I cannot find this information in the provided documents"
35
+ 4. Do not make up information or use external knowledge
36
+ 5. Be concise and direct in your response`;
37
+ // =============================================================================
38
+ // CHAT TEMPLATES
39
+ // =============================================================================
40
+ /**
41
+ * SmolLM2 chat template format
42
+ * Uses <|im_start|> and <|im_end|> tokens
43
+ */
44
+ export const SMOLLM2_CHAT_TEMPLATE = {
45
+ systemStart: '<|im_start|>system\n',
46
+ systemEnd: '<|im_end|>\n',
47
+ userStart: '<|im_start|>user\n',
48
+ userEnd: '<|im_end|>\n',
49
+ assistantStart: '<|im_start|>assistant\n',
50
+ assistantEnd: '<|im_end|>',
51
+ endOfText: '<|endoftext|>'
52
+ };
53
+ /**
54
+ * Format search result chunks into context string for the prompt
55
+ *
56
+ * @param chunks - Search result chunks to format
57
+ * @param options - Formatting options
58
+ * @returns Formatted context with metadata
59
+ */
60
+ export function formatContextChunks(chunks, options) {
61
+ const { maxContextTokens, includeDocumentInfo = true, includeScores = false, chunkSeparator = '\n---\n', tokenEstimationRatio = 4 // ~4 chars per token for English
62
+ } = options;
63
+ const maxChars = maxContextTokens * tokenEstimationRatio;
64
+ let currentChars = 0;
65
+ const includedChunks = [];
66
+ let truncated = false;
67
+ for (let i = 0; i < chunks.length; i++) {
68
+ const chunk = chunks[i];
69
+ // Format this chunk
70
+ let chunkText = '';
71
+ if (includeDocumentInfo) {
72
+ chunkText += `[Document ${i + 1}: ${chunk.document.title}]`;
73
+ if (includeScores) {
74
+ chunkText += ` (relevance: ${(chunk.score * 100).toFixed(1)}%)`;
75
+ }
76
+ chunkText += '\n';
77
+ }
78
+ chunkText += chunk.content;
79
+ // Check if adding this chunk would exceed budget
80
+ const chunkChars = chunkText.length + (includedChunks.length > 0 ? chunkSeparator.length : 0);
81
+ if (currentChars + chunkChars > maxChars) {
82
+ // Check if we can fit a truncated version of this chunk
83
+ const remainingChars = maxChars - currentChars - (includedChunks.length > 0 ? chunkSeparator.length : 0);
84
+ if (remainingChars > 100 && includedChunks.length === 0) {
85
+ // Truncate the first chunk if it's the only option
86
+ chunkText = chunkText.substring(0, remainingChars - 20) + '\n[Content truncated...]';
87
+ includedChunks.push(chunkText);
88
+ currentChars += chunkText.length;
89
+ }
90
+ truncated = true;
91
+ break;
92
+ }
93
+ includedChunks.push(chunkText);
94
+ currentChars += chunkChars;
95
+ }
96
+ const text = includedChunks.join(chunkSeparator);
97
+ const estimatedTokens = Math.ceil(text.length / tokenEstimationRatio);
98
+ return {
99
+ text,
100
+ estimatedTokens,
101
+ chunksIncluded: includedChunks.length,
102
+ totalChunks: chunks.length,
103
+ truncated
104
+ };
105
+ }
106
+ /**
107
+ * Build a complete prompt for the generator model
108
+ *
109
+ * @param options - Prompt building options
110
+ * @returns Built prompt with metadata
111
+ */
112
+ export function buildPrompt(options) {
113
+ const { query, chunks, modelType, systemPrompt, maxContextLength, reservedOutputTokens, includeSourceAttribution = false } = options;
114
+ // Calculate available tokens for context
115
+ const promptOverhead = modelType === 'instruct' ? 150 : 50; // Tokens for formatting
116
+ const queryTokens = Math.ceil(query.length / 4);
117
+ const availableContextTokens = maxContextLength - reservedOutputTokens - promptOverhead - queryTokens;
118
+ // Format context chunks
119
+ const contextInfo = formatContextChunks(chunks, {
120
+ maxContextTokens: availableContextTokens,
121
+ includeDocumentInfo: true,
122
+ includeScores: false
123
+ });
124
+ // Build prompt based on model type
125
+ let prompt;
126
+ let systemPromptUsed;
127
+ if (modelType === 'instruct') {
128
+ prompt = buildInstructPrompt(query, contextInfo.text, systemPrompt, includeSourceAttribution);
129
+ systemPromptUsed = systemPrompt || (includeSourceAttribution ? DEFAULT_SYSTEM_PROMPT_WITH_ATTRIBUTION : DEFAULT_SYSTEM_PROMPT);
130
+ }
131
+ else {
132
+ prompt = buildCausalLMPrompt(query, contextInfo.text);
133
+ }
134
+ const estimatedTokens = Math.ceil(prompt.length / 4);
135
+ return {
136
+ prompt,
137
+ estimatedTokens,
138
+ contextInfo,
139
+ systemPromptUsed
140
+ };
141
+ }
142
+ /**
143
+ * Build prompt for instruct models (SmolLM2-Instruct)
144
+ * Uses chat template format with system/user/assistant roles
145
+ */
146
+ function buildInstructPrompt(query, context, customSystemPrompt, includeSourceAttribution = false) {
147
+ const systemPrompt = customSystemPrompt ||
148
+ (includeSourceAttribution ? DEFAULT_SYSTEM_PROMPT_WITH_ATTRIBUTION : DEFAULT_SYSTEM_PROMPT);
149
+ const template = SMOLLM2_CHAT_TEMPLATE;
150
+ const userMessage = `Context:
151
+ ${context}
152
+
153
+ Question: ${query}
154
+
155
+ Answer based only on the context above:`;
156
+ return `${template.systemStart}${systemPrompt}${template.systemEnd}${template.userStart}${userMessage}${template.userEnd}${template.assistantStart}`;
157
+ }
158
+ /**
159
+ * Build prompt for causal LM models (DistilGPT2)
160
+ * Uses simple document + question format without roles
161
+ */
162
+ function buildCausalLMPrompt(query, context) {
163
+ return `The following documents contain information to answer the question.
164
+
165
+ Documents:
166
+ ${context}
167
+
168
+ Based on the documents above, answer this question: ${query}
169
+
170
+ Answer:`;
171
+ }
172
+ // =============================================================================
173
+ // TOKEN ESTIMATION
174
+ // =============================================================================
175
+ /**
176
+ * Estimate token count for a string
177
+ * Uses a simple character-based heuristic (~4 chars per token for English)
178
+ *
179
+ * @param text - Text to estimate tokens for
180
+ * @returns Estimated token count
181
+ */
182
+ export function estimateTokenCount(text) {
183
+ // Simple heuristic: ~4 characters per token for English text
184
+ // This is a rough approximation; actual tokenization varies by model
185
+ return Math.ceil(text.length / 4);
186
+ }
187
+ /**
188
+ * Calculate available context budget
189
+ *
190
+ * @param maxContextLength - Maximum context window size
191
+ * @param reservedOutputTokens - Tokens reserved for generation
192
+ * @param promptOverhead - Tokens used by prompt formatting
193
+ * @returns Available tokens for context chunks
194
+ */
195
+ export function calculateContextBudget(maxContextLength, reservedOutputTokens, promptOverhead = 100) {
196
+ return Math.max(0, maxContextLength - reservedOutputTokens - promptOverhead);
197
+ }
198
+ // =============================================================================
199
+ // STOP SEQUENCES
200
+ // =============================================================================
201
+ /**
202
+ * Get default stop sequences for a model type
203
+ *
204
+ * @param modelType - Generator model type
205
+ * @returns Array of stop sequences
206
+ */
207
+ export function getDefaultStopSequences(modelType) {
208
+ if (modelType === 'instruct') {
209
+ return [
210
+ SMOLLM2_CHAT_TEMPLATE.assistantEnd,
211
+ SMOLLM2_CHAT_TEMPLATE.endOfText,
212
+ '<|im_start|>',
213
+ '\n\nQuestion:',
214
+ '\n\nContext:'
215
+ ];
216
+ }
217
+ // Causal LM stop sequences
218
+ return [
219
+ '\n\nQuestion:',
220
+ '\n\nDocuments:',
221
+ '\n\n---',
222
+ '<|endoftext|>'
223
+ ];
224
+ }
225
+ //# sourceMappingURL=prompt-templates.js.map