rag-lite-ts 2.2.0 → 2.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +88 -5
- package/dist/cjs/cli/indexer.js +73 -15
- package/dist/cjs/cli/search.js +77 -2
- package/dist/cjs/cli/ui-server.d.ts +5 -0
- package/dist/cjs/cli/ui-server.js +152 -0
- package/dist/cjs/cli.js +53 -7
- package/dist/cjs/core/abstract-generator.d.ts +97 -0
- package/dist/cjs/core/abstract-generator.js +222 -0
- package/dist/cjs/core/binary-index-format.js +53 -10
- package/dist/cjs/core/db.d.ts +56 -0
- package/dist/cjs/core/db.js +105 -0
- package/dist/cjs/core/generator-registry.d.ts +114 -0
- package/dist/cjs/core/generator-registry.js +280 -0
- package/dist/cjs/core/index.d.ts +4 -0
- package/dist/cjs/core/index.js +11 -0
- package/dist/cjs/core/ingestion.js +3 -0
- package/dist/cjs/core/knowledge-base-manager.d.ts +109 -0
- package/dist/cjs/core/knowledge-base-manager.js +256 -0
- package/dist/cjs/core/lazy-dependency-loader.d.ts +43 -0
- package/dist/cjs/core/lazy-dependency-loader.js +111 -2
- package/dist/cjs/core/prompt-templates.d.ts +138 -0
- package/dist/cjs/core/prompt-templates.js +225 -0
- package/dist/cjs/core/response-generator.d.ts +132 -0
- package/dist/cjs/core/response-generator.js +69 -0
- package/dist/cjs/core/search-pipeline.js +1 -1
- package/dist/cjs/core/search.d.ts +72 -1
- package/dist/cjs/core/search.js +80 -7
- package/dist/cjs/core/types.d.ts +1 -0
- package/dist/cjs/core/vector-index-messages.d.ts +52 -0
- package/dist/cjs/core/vector-index-messages.js +5 -0
- package/dist/cjs/core/vector-index-worker.d.ts +6 -0
- package/dist/cjs/core/vector-index-worker.js +314 -0
- package/dist/cjs/core/vector-index.d.ts +45 -10
- package/dist/cjs/core/vector-index.js +279 -218
- package/dist/cjs/factories/generator-factory.d.ts +88 -0
- package/dist/cjs/factories/generator-factory.js +151 -0
- package/dist/cjs/factories/index.d.ts +1 -0
- package/dist/cjs/factories/index.js +5 -0
- package/dist/cjs/factories/ingestion-factory.js +3 -7
- package/dist/cjs/factories/search-factory.js +11 -0
- package/dist/cjs/index-manager.d.ts +23 -3
- package/dist/cjs/index-manager.js +84 -15
- package/dist/cjs/index.d.ts +11 -1
- package/dist/cjs/index.js +19 -1
- package/dist/cjs/text/generators/causal-lm-generator.d.ts +65 -0
- package/dist/cjs/text/generators/causal-lm-generator.js +197 -0
- package/dist/cjs/text/generators/index.d.ts +10 -0
- package/dist/cjs/text/generators/index.js +10 -0
- package/dist/cjs/text/generators/instruct-generator.d.ts +62 -0
- package/dist/cjs/text/generators/instruct-generator.js +192 -0
- package/dist/esm/cli/indexer.js +73 -15
- package/dist/esm/cli/search.js +77 -2
- package/dist/esm/cli/ui-server.d.ts +5 -0
- package/dist/esm/cli/ui-server.js +152 -0
- package/dist/esm/cli.js +53 -7
- package/dist/esm/core/abstract-generator.d.ts +97 -0
- package/dist/esm/core/abstract-generator.js +222 -0
- package/dist/esm/core/binary-index-format.js +53 -10
- package/dist/esm/core/db.d.ts +56 -0
- package/dist/esm/core/db.js +105 -0
- package/dist/esm/core/generator-registry.d.ts +114 -0
- package/dist/esm/core/generator-registry.js +280 -0
- package/dist/esm/core/index.d.ts +4 -0
- package/dist/esm/core/index.js +11 -0
- package/dist/esm/core/ingestion.js +3 -0
- package/dist/esm/core/knowledge-base-manager.d.ts +109 -0
- package/dist/esm/core/knowledge-base-manager.js +256 -0
- package/dist/esm/core/lazy-dependency-loader.d.ts +43 -0
- package/dist/esm/core/lazy-dependency-loader.js +111 -2
- package/dist/esm/core/prompt-templates.d.ts +138 -0
- package/dist/esm/core/prompt-templates.js +225 -0
- package/dist/esm/core/response-generator.d.ts +132 -0
- package/dist/esm/core/response-generator.js +69 -0
- package/dist/esm/core/search-pipeline.js +1 -1
- package/dist/esm/core/search.d.ts +72 -1
- package/dist/esm/core/search.js +80 -7
- package/dist/esm/core/types.d.ts +1 -0
- package/dist/esm/core/vector-index-messages.d.ts +52 -0
- package/dist/esm/core/vector-index-messages.js +5 -0
- package/dist/esm/core/vector-index-worker.d.ts +6 -0
- package/dist/esm/core/vector-index-worker.js +314 -0
- package/dist/esm/core/vector-index.d.ts +45 -10
- package/dist/esm/core/vector-index.js +279 -218
- package/dist/esm/factories/generator-factory.d.ts +88 -0
- package/dist/esm/factories/generator-factory.js +151 -0
- package/dist/esm/factories/index.d.ts +1 -0
- package/dist/esm/factories/index.js +5 -0
- package/dist/esm/factories/ingestion-factory.js +3 -7
- package/dist/esm/factories/search-factory.js +11 -0
- package/dist/esm/index-manager.d.ts +23 -3
- package/dist/esm/index-manager.js +84 -15
- package/dist/esm/index.d.ts +11 -1
- package/dist/esm/index.js +19 -1
- package/dist/esm/text/generators/causal-lm-generator.d.ts +65 -0
- package/dist/esm/text/generators/causal-lm-generator.js +197 -0
- package/dist/esm/text/generators/index.d.ts +10 -0
- package/dist/esm/text/generators/index.js +10 -0
- package/dist/esm/text/generators/instruct-generator.d.ts +62 -0
- package/dist/esm/text/generators/instruct-generator.js +192 -0
- package/package.json +14 -7
|
@@ -8,6 +8,7 @@
|
|
|
8
8
|
import '../dom-polyfills.js';
|
|
9
9
|
import type { UniversalEmbedder } from './universal-embedder.js';
|
|
10
10
|
import type { RerankFunction } from './interfaces.js';
|
|
11
|
+
import type { ResponseGenerator } from './response-generator.js';
|
|
11
12
|
/**
|
|
12
13
|
* Lazy loader for embedder implementations
|
|
13
14
|
* Only loads the specific embedder type when needed
|
|
@@ -42,6 +43,42 @@ export declare class LazyEmbedderLoader {
|
|
|
42
43
|
multimodalEmbedders: number;
|
|
43
44
|
};
|
|
44
45
|
}
|
|
46
|
+
/**
|
|
47
|
+
* Lazy loader for response generator implementations
|
|
48
|
+
* Only loads the specific generator type when needed
|
|
49
|
+
*
|
|
50
|
+
* @experimental This feature is experimental and may change in future versions.
|
|
51
|
+
*/
|
|
52
|
+
export declare class LazyGeneratorLoader {
|
|
53
|
+
private static cache;
|
|
54
|
+
/**
|
|
55
|
+
* Lazily load and create an instruct generator (SmolLM2-Instruct)
|
|
56
|
+
* Only imports the module when generation is actually requested
|
|
57
|
+
*/
|
|
58
|
+
static loadInstructGenerator(modelName: string, options?: any): Promise<ResponseGenerator>;
|
|
59
|
+
/**
|
|
60
|
+
* Lazily load and create a causal LM generator (DistilGPT2)
|
|
61
|
+
* Only imports the module when generation is actually requested
|
|
62
|
+
*/
|
|
63
|
+
static loadCausalLMGenerator(modelName: string, options?: any): Promise<ResponseGenerator>;
|
|
64
|
+
/**
|
|
65
|
+
* Check if a generator is already loaded in cache
|
|
66
|
+
*/
|
|
67
|
+
static isGeneratorLoaded(modelName: string, modelType: 'instruct' | 'causal-lm'): boolean;
|
|
68
|
+
/**
|
|
69
|
+
* Remove a generator from the cache (called when generator is cleaned up)
|
|
70
|
+
*/
|
|
71
|
+
static removeGeneratorFromCache(modelName: string, modelType: 'instruct' | 'causal-lm'): void;
|
|
72
|
+
/**
|
|
73
|
+
* Get statistics about loaded generators
|
|
74
|
+
*/
|
|
75
|
+
static getLoadingStats(): {
|
|
76
|
+
loadedGenerators: string[];
|
|
77
|
+
totalLoaded: number;
|
|
78
|
+
instructGenerators: number;
|
|
79
|
+
causalLMGenerators: number;
|
|
80
|
+
};
|
|
81
|
+
}
|
|
45
82
|
/**
|
|
46
83
|
* Lazy loader for reranking implementations
|
|
47
84
|
* Only loads the specific reranker type when needed
|
|
@@ -107,6 +144,11 @@ export declare class LazyMultimodalLoader {
|
|
|
107
144
|
* Provides a single entry point for dependency management
|
|
108
145
|
*/
|
|
109
146
|
export declare class LazyDependencyManager {
|
|
147
|
+
/**
|
|
148
|
+
* Load response generator based on model type with lazy loading
|
|
149
|
+
* @experimental This feature is experimental and may change in future versions.
|
|
150
|
+
*/
|
|
151
|
+
static loadGenerator(modelName: string, modelType: 'instruct' | 'causal-lm', options?: any): Promise<ResponseGenerator>;
|
|
110
152
|
/**
|
|
111
153
|
* Load embedder based on model type with lazy loading
|
|
112
154
|
*/
|
|
@@ -121,6 +163,7 @@ export declare class LazyDependencyManager {
|
|
|
121
163
|
static getLoadingStatistics(): {
|
|
122
164
|
embedders: ReturnType<typeof LazyEmbedderLoader.getLoadingStats>;
|
|
123
165
|
rerankers: ReturnType<typeof LazyRerankerLoader.getLoadingStats>;
|
|
166
|
+
generators: ReturnType<typeof LazyGeneratorLoader.getLoadingStats>;
|
|
124
167
|
multimodal: ReturnType<typeof LazyMultimodalLoader.getMultimodalLoadingStatus>;
|
|
125
168
|
totalModulesLoaded: number;
|
|
126
169
|
memoryImpact: 'low' | 'medium' | 'high';
|
|
@@ -149,6 +149,99 @@ export class LazyEmbedderLoader {
|
|
|
149
149
|
}
|
|
150
150
|
}
|
|
151
151
|
// =============================================================================
|
|
152
|
+
// LAZY GENERATOR LOADING
|
|
153
|
+
// =============================================================================
|
|
154
|
+
/**
|
|
155
|
+
* Lazy loader for response generator implementations
|
|
156
|
+
* Only loads the specific generator type when needed
|
|
157
|
+
*
|
|
158
|
+
* @experimental This feature is experimental and may change in future versions.
|
|
159
|
+
*/
|
|
160
|
+
export class LazyGeneratorLoader {
|
|
161
|
+
static cache = LazyLoadingCache.getInstance();
|
|
162
|
+
/**
|
|
163
|
+
* Lazily load and create an instruct generator (SmolLM2-Instruct)
|
|
164
|
+
* Only imports the module when generation is actually requested
|
|
165
|
+
*/
|
|
166
|
+
static async loadInstructGenerator(modelName, options = {}) {
|
|
167
|
+
const cacheKey = `generator:instruct:${modelName}`;
|
|
168
|
+
return this.cache.getOrLoad(cacheKey, async () => {
|
|
169
|
+
try {
|
|
170
|
+
console.log(`🔄 [EXPERIMENTAL] Lazy loading instruct generator: ${modelName}`);
|
|
171
|
+
// Dynamic import - only loaded when generation is requested
|
|
172
|
+
const { InstructGenerator } = await import('../text/generators/instruct-generator.js');
|
|
173
|
+
const generator = new InstructGenerator(modelName, options);
|
|
174
|
+
await generator.loadModel();
|
|
175
|
+
console.log(`✅ Instruct generator loaded: ${modelName}`);
|
|
176
|
+
return generator;
|
|
177
|
+
}
|
|
178
|
+
catch (error) {
|
|
179
|
+
const enhancedError = createError.model(`Failed to lazy load instruct generator '${modelName}': ${error instanceof Error ? error.message : 'Unknown error'}`);
|
|
180
|
+
handleError(enhancedError, 'LazyGeneratorLoader', {
|
|
181
|
+
severity: ErrorSeverity.ERROR,
|
|
182
|
+
category: ErrorCategory.MODEL
|
|
183
|
+
});
|
|
184
|
+
throw enhancedError;
|
|
185
|
+
}
|
|
186
|
+
});
|
|
187
|
+
}
|
|
188
|
+
/**
|
|
189
|
+
* Lazily load and create a causal LM generator (DistilGPT2)
|
|
190
|
+
* Only imports the module when generation is actually requested
|
|
191
|
+
*/
|
|
192
|
+
static async loadCausalLMGenerator(modelName, options = {}) {
|
|
193
|
+
const cacheKey = `generator:causal-lm:${modelName}`;
|
|
194
|
+
return this.cache.getOrLoad(cacheKey, async () => {
|
|
195
|
+
try {
|
|
196
|
+
console.log(`🔄 [EXPERIMENTAL] Lazy loading causal LM generator: ${modelName}`);
|
|
197
|
+
// Dynamic import - only loaded when generation is requested
|
|
198
|
+
const { CausalLMGenerator } = await import('../text/generators/causal-lm-generator.js');
|
|
199
|
+
const generator = new CausalLMGenerator(modelName, options);
|
|
200
|
+
await generator.loadModel();
|
|
201
|
+
console.log(`✅ Causal LM generator loaded: ${modelName}`);
|
|
202
|
+
return generator;
|
|
203
|
+
}
|
|
204
|
+
catch (error) {
|
|
205
|
+
const enhancedError = createError.model(`Failed to lazy load causal LM generator '${modelName}': ${error instanceof Error ? error.message : 'Unknown error'}`);
|
|
206
|
+
handleError(enhancedError, 'LazyGeneratorLoader', {
|
|
207
|
+
severity: ErrorSeverity.ERROR,
|
|
208
|
+
category: ErrorCategory.MODEL
|
|
209
|
+
});
|
|
210
|
+
throw enhancedError;
|
|
211
|
+
}
|
|
212
|
+
});
|
|
213
|
+
}
|
|
214
|
+
/**
|
|
215
|
+
* Check if a generator is already loaded in cache
|
|
216
|
+
*/
|
|
217
|
+
static isGeneratorLoaded(modelName, modelType) {
|
|
218
|
+
const cacheKey = `generator:${modelType}:${modelName}`;
|
|
219
|
+
return this.cache.getLoadedModules().includes(cacheKey);
|
|
220
|
+
}
|
|
221
|
+
/**
|
|
222
|
+
* Remove a generator from the cache (called when generator is cleaned up)
|
|
223
|
+
*/
|
|
224
|
+
static removeGeneratorFromCache(modelName, modelType) {
|
|
225
|
+
const cacheKey = `generator:${modelType}:${modelName}`;
|
|
226
|
+
this.cache.remove(cacheKey);
|
|
227
|
+
console.log(`🧹 Removed generator from cache: ${cacheKey}`);
|
|
228
|
+
}
|
|
229
|
+
/**
|
|
230
|
+
* Get statistics about loaded generators
|
|
231
|
+
*/
|
|
232
|
+
static getLoadingStats() {
|
|
233
|
+
const loadedModules = this.cache.getLoadedModules().filter(key => key.startsWith('generator:'));
|
|
234
|
+
const instructGenerators = loadedModules.filter(key => key.includes(':instruct:')).length;
|
|
235
|
+
const causalLMGenerators = loadedModules.filter(key => key.includes(':causal-lm:')).length;
|
|
236
|
+
return {
|
|
237
|
+
loadedGenerators: loadedModules,
|
|
238
|
+
totalLoaded: loadedModules.length,
|
|
239
|
+
instructGenerators,
|
|
240
|
+
causalLMGenerators
|
|
241
|
+
};
|
|
242
|
+
}
|
|
243
|
+
}
|
|
244
|
+
// =============================================================================
|
|
152
245
|
// LAZY RERANKER LOADING
|
|
153
246
|
// =============================================================================
|
|
154
247
|
/**
|
|
@@ -332,6 +425,20 @@ export class LazyMultimodalLoader {
|
|
|
332
425
|
* Provides a single entry point for dependency management
|
|
333
426
|
*/
|
|
334
427
|
export class LazyDependencyManager {
|
|
428
|
+
/**
|
|
429
|
+
* Load response generator based on model type with lazy loading
|
|
430
|
+
* @experimental This feature is experimental and may change in future versions.
|
|
431
|
+
*/
|
|
432
|
+
static async loadGenerator(modelName, modelType, options = {}) {
|
|
433
|
+
switch (modelType) {
|
|
434
|
+
case 'instruct':
|
|
435
|
+
return LazyGeneratorLoader.loadInstructGenerator(modelName, options);
|
|
436
|
+
case 'causal-lm':
|
|
437
|
+
return LazyGeneratorLoader.loadCausalLMGenerator(modelName, options);
|
|
438
|
+
default:
|
|
439
|
+
throw createError.validation(`Unsupported generator model type for lazy loading: ${modelType}`);
|
|
440
|
+
}
|
|
441
|
+
}
|
|
335
442
|
/**
|
|
336
443
|
* Load embedder based on model type with lazy loading
|
|
337
444
|
*/
|
|
@@ -367,19 +474,21 @@ export class LazyDependencyManager {
|
|
|
367
474
|
static getLoadingStatistics() {
|
|
368
475
|
const embedderStats = LazyEmbedderLoader.getLoadingStats();
|
|
369
476
|
const rerankerStats = LazyRerankerLoader.getLoadingStats();
|
|
477
|
+
const generatorStats = LazyGeneratorLoader.getLoadingStats();
|
|
370
478
|
const multimodalStats = LazyMultimodalLoader.getMultimodalLoadingStatus();
|
|
371
|
-
const totalModules = embedderStats.totalLoaded + rerankerStats.totalLoaded + multimodalStats.loadedProcessors.length;
|
|
479
|
+
const totalModules = embedderStats.totalLoaded + rerankerStats.totalLoaded + generatorStats.totalLoaded + multimodalStats.loadedProcessors.length;
|
|
372
480
|
// Estimate memory impact based on loaded modules
|
|
373
481
|
let memoryImpact = 'low';
|
|
374
482
|
if (embedderStats.multimodalEmbedders > 0 || multimodalStats.imageToTextLoaded) {
|
|
375
483
|
memoryImpact = 'high';
|
|
376
484
|
}
|
|
377
|
-
else if (totalModules > 2) {
|
|
485
|
+
else if (totalModules > 2 || generatorStats.totalLoaded > 0) {
|
|
378
486
|
memoryImpact = 'medium';
|
|
379
487
|
}
|
|
380
488
|
return {
|
|
381
489
|
embedders: embedderStats,
|
|
382
490
|
rerankers: rerankerStats,
|
|
491
|
+
generators: generatorStats,
|
|
383
492
|
multimodal: multimodalStats,
|
|
384
493
|
totalModulesLoaded: totalModules,
|
|
385
494
|
memoryImpact
|
|
@@ -0,0 +1,138 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* CORE MODULE — Prompt Templates for RAG Response Generation
|
|
3
|
+
*
|
|
4
|
+
* Provides prompt engineering utilities for different generator model types.
|
|
5
|
+
* Handles context formatting, token budget management, and system prompts.
|
|
6
|
+
*
|
|
7
|
+
* PROMPT STRATEGIES:
|
|
8
|
+
* - Instruct models: Use chat template with system/user/assistant roles
|
|
9
|
+
* - Causal LM models: Use simple document + question format
|
|
10
|
+
*
|
|
11
|
+
* @experimental This feature is experimental and may change in future versions.
|
|
12
|
+
*/
|
|
13
|
+
import type { SearchResult } from './types.js';
|
|
14
|
+
import type { GeneratorModelType } from './response-generator.js';
|
|
15
|
+
/**
|
|
16
|
+
* Default system prompt for instruct models
|
|
17
|
+
* Emphasizes grounded responses using only provided context
|
|
18
|
+
*/
|
|
19
|
+
export declare const DEFAULT_SYSTEM_PROMPT = "You are a helpful assistant that answers questions based ONLY on the provided context documents. Follow these rules strictly:\n\n1. Answer ONLY using information found in the context documents\n2. If the answer cannot be found in the context, say \"I cannot find this information in the provided documents\"\n3. Do not make up information or use external knowledge\n4. Be concise and direct in your response\n5. If the context is incomplete or unclear, acknowledge this limitation";
|
|
20
|
+
/**
|
|
21
|
+
* Default system prompt for RAG with source attribution
|
|
22
|
+
*/
|
|
23
|
+
export declare const DEFAULT_SYSTEM_PROMPT_WITH_ATTRIBUTION = "You are a helpful assistant that answers questions based ONLY on the provided context documents. Follow these rules strictly:\n\n1. Answer ONLY using information found in the context documents\n2. When possible, mention which document the information comes from\n3. If the answer cannot be found in the context, say \"I cannot find this information in the provided documents\"\n4. Do not make up information or use external knowledge\n5. Be concise and direct in your response";
|
|
24
|
+
/**
|
|
25
|
+
* SmolLM2 chat template format
|
|
26
|
+
* Uses <|im_start|> and <|im_end|> tokens
|
|
27
|
+
*/
|
|
28
|
+
export declare const SMOLLM2_CHAT_TEMPLATE: {
|
|
29
|
+
systemStart: string;
|
|
30
|
+
systemEnd: string;
|
|
31
|
+
userStart: string;
|
|
32
|
+
userEnd: string;
|
|
33
|
+
assistantStart: string;
|
|
34
|
+
assistantEnd: string;
|
|
35
|
+
endOfText: string;
|
|
36
|
+
};
|
|
37
|
+
/**
|
|
38
|
+
* Options for formatting context chunks
|
|
39
|
+
*/
|
|
40
|
+
export interface ContextFormattingOptions {
|
|
41
|
+
/** Maximum tokens available for context */
|
|
42
|
+
maxContextTokens: number;
|
|
43
|
+
/** Include document titles/sources */
|
|
44
|
+
includeDocumentInfo?: boolean;
|
|
45
|
+
/** Include relevance scores */
|
|
46
|
+
includeScores?: boolean;
|
|
47
|
+
/** Separator between chunks */
|
|
48
|
+
chunkSeparator?: string;
|
|
49
|
+
/** Token estimation function (chars to tokens ratio) */
|
|
50
|
+
tokenEstimationRatio?: number;
|
|
51
|
+
}
|
|
52
|
+
/**
|
|
53
|
+
* Result of context formatting
|
|
54
|
+
*/
|
|
55
|
+
export interface FormattedContext {
|
|
56
|
+
/** Formatted context string */
|
|
57
|
+
text: string;
|
|
58
|
+
/** Estimated token count */
|
|
59
|
+
estimatedTokens: number;
|
|
60
|
+
/** Number of chunks included */
|
|
61
|
+
chunksIncluded: number;
|
|
62
|
+
/** Total chunks available */
|
|
63
|
+
totalChunks: number;
|
|
64
|
+
/** Whether context was truncated */
|
|
65
|
+
truncated: boolean;
|
|
66
|
+
}
|
|
67
|
+
/**
|
|
68
|
+
* Format search result chunks into context string for the prompt
|
|
69
|
+
*
|
|
70
|
+
* @param chunks - Search result chunks to format
|
|
71
|
+
* @param options - Formatting options
|
|
72
|
+
* @returns Formatted context with metadata
|
|
73
|
+
*/
|
|
74
|
+
export declare function formatContextChunks(chunks: SearchResult[], options: ContextFormattingOptions): FormattedContext;
|
|
75
|
+
/**
|
|
76
|
+
* Options for building the complete prompt
|
|
77
|
+
*/
|
|
78
|
+
export interface PromptBuildOptions {
|
|
79
|
+
/** User's query */
|
|
80
|
+
query: string;
|
|
81
|
+
/** Search result chunks */
|
|
82
|
+
chunks: SearchResult[];
|
|
83
|
+
/** Generator model type */
|
|
84
|
+
modelType: GeneratorModelType;
|
|
85
|
+
/** Custom system prompt (optional) */
|
|
86
|
+
systemPrompt?: string;
|
|
87
|
+
/** Maximum context window tokens */
|
|
88
|
+
maxContextLength: number;
|
|
89
|
+
/** Tokens reserved for output */
|
|
90
|
+
reservedOutputTokens: number;
|
|
91
|
+
/** Include source attribution hint */
|
|
92
|
+
includeSourceAttribution?: boolean;
|
|
93
|
+
}
|
|
94
|
+
/**
|
|
95
|
+
* Result of prompt building
|
|
96
|
+
*/
|
|
97
|
+
export interface BuiltPrompt {
|
|
98
|
+
/** Complete prompt string */
|
|
99
|
+
prompt: string;
|
|
100
|
+
/** Estimated total tokens */
|
|
101
|
+
estimatedTokens: number;
|
|
102
|
+
/** Context metadata */
|
|
103
|
+
contextInfo: FormattedContext;
|
|
104
|
+
/** System prompt used (if any) */
|
|
105
|
+
systemPromptUsed?: string;
|
|
106
|
+
}
|
|
107
|
+
/**
|
|
108
|
+
* Build a complete prompt for the generator model
|
|
109
|
+
*
|
|
110
|
+
* @param options - Prompt building options
|
|
111
|
+
* @returns Built prompt with metadata
|
|
112
|
+
*/
|
|
113
|
+
export declare function buildPrompt(options: PromptBuildOptions): BuiltPrompt;
|
|
114
|
+
/**
|
|
115
|
+
* Estimate token count for a string
|
|
116
|
+
* Uses a simple character-based heuristic (~4 chars per token for English)
|
|
117
|
+
*
|
|
118
|
+
* @param text - Text to estimate tokens for
|
|
119
|
+
* @returns Estimated token count
|
|
120
|
+
*/
|
|
121
|
+
export declare function estimateTokenCount(text: string): number;
|
|
122
|
+
/**
|
|
123
|
+
* Calculate available context budget
|
|
124
|
+
*
|
|
125
|
+
* @param maxContextLength - Maximum context window size
|
|
126
|
+
* @param reservedOutputTokens - Tokens reserved for generation
|
|
127
|
+
* @param promptOverhead - Tokens used by prompt formatting
|
|
128
|
+
* @returns Available tokens for context chunks
|
|
129
|
+
*/
|
|
130
|
+
export declare function calculateContextBudget(maxContextLength: number, reservedOutputTokens: number, promptOverhead?: number): number;
|
|
131
|
+
/**
|
|
132
|
+
* Get default stop sequences for a model type
|
|
133
|
+
*
|
|
134
|
+
* @param modelType - Generator model type
|
|
135
|
+
* @returns Array of stop sequences
|
|
136
|
+
*/
|
|
137
|
+
export declare function getDefaultStopSequences(modelType: GeneratorModelType): string[];
|
|
138
|
+
//# sourceMappingURL=prompt-templates.d.ts.map
|
|
@@ -0,0 +1,225 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* CORE MODULE — Prompt Templates for RAG Response Generation
|
|
3
|
+
*
|
|
4
|
+
* Provides prompt engineering utilities for different generator model types.
|
|
5
|
+
* Handles context formatting, token budget management, and system prompts.
|
|
6
|
+
*
|
|
7
|
+
* PROMPT STRATEGIES:
|
|
8
|
+
* - Instruct models: Use chat template with system/user/assistant roles
|
|
9
|
+
* - Causal LM models: Use simple document + question format
|
|
10
|
+
*
|
|
11
|
+
* @experimental This feature is experimental and may change in future versions.
|
|
12
|
+
*/
|
|
13
|
+
// =============================================================================
|
|
14
|
+
// DEFAULT PROMPTS
|
|
15
|
+
// =============================================================================
|
|
16
|
+
/**
|
|
17
|
+
* Default system prompt for instruct models
|
|
18
|
+
* Emphasizes grounded responses using only provided context
|
|
19
|
+
*/
|
|
20
|
+
export const DEFAULT_SYSTEM_PROMPT = `You are a helpful assistant that answers questions based ONLY on the provided context documents. Follow these rules strictly:
|
|
21
|
+
|
|
22
|
+
1. Answer ONLY using information found in the context documents
|
|
23
|
+
2. If the answer cannot be found in the context, say "I cannot find this information in the provided documents"
|
|
24
|
+
3. Do not make up information or use external knowledge
|
|
25
|
+
4. Be concise and direct in your response
|
|
26
|
+
5. If the context is incomplete or unclear, acknowledge this limitation`;
|
|
27
|
+
/**
|
|
28
|
+
* Default system prompt for RAG with source attribution
|
|
29
|
+
*/
|
|
30
|
+
export const DEFAULT_SYSTEM_PROMPT_WITH_ATTRIBUTION = `You are a helpful assistant that answers questions based ONLY on the provided context documents. Follow these rules strictly:
|
|
31
|
+
|
|
32
|
+
1. Answer ONLY using information found in the context documents
|
|
33
|
+
2. When possible, mention which document the information comes from
|
|
34
|
+
3. If the answer cannot be found in the context, say "I cannot find this information in the provided documents"
|
|
35
|
+
4. Do not make up information or use external knowledge
|
|
36
|
+
5. Be concise and direct in your response`;
|
|
37
|
+
// =============================================================================
|
|
38
|
+
// CHAT TEMPLATES
|
|
39
|
+
// =============================================================================
|
|
40
|
+
/**
|
|
41
|
+
* SmolLM2 chat template format
|
|
42
|
+
* Uses <|im_start|> and <|im_end|> tokens
|
|
43
|
+
*/
|
|
44
|
+
export const SMOLLM2_CHAT_TEMPLATE = {
|
|
45
|
+
systemStart: '<|im_start|>system\n',
|
|
46
|
+
systemEnd: '<|im_end|>\n',
|
|
47
|
+
userStart: '<|im_start|>user\n',
|
|
48
|
+
userEnd: '<|im_end|>\n',
|
|
49
|
+
assistantStart: '<|im_start|>assistant\n',
|
|
50
|
+
assistantEnd: '<|im_end|>',
|
|
51
|
+
endOfText: '<|endoftext|>'
|
|
52
|
+
};
|
|
53
|
+
/**
|
|
54
|
+
* Format search result chunks into context string for the prompt
|
|
55
|
+
*
|
|
56
|
+
* @param chunks - Search result chunks to format
|
|
57
|
+
* @param options - Formatting options
|
|
58
|
+
* @returns Formatted context with metadata
|
|
59
|
+
*/
|
|
60
|
+
export function formatContextChunks(chunks, options) {
|
|
61
|
+
const { maxContextTokens, includeDocumentInfo = true, includeScores = false, chunkSeparator = '\n---\n', tokenEstimationRatio = 4 // ~4 chars per token for English
|
|
62
|
+
} = options;
|
|
63
|
+
const maxChars = maxContextTokens * tokenEstimationRatio;
|
|
64
|
+
let currentChars = 0;
|
|
65
|
+
const includedChunks = [];
|
|
66
|
+
let truncated = false;
|
|
67
|
+
for (let i = 0; i < chunks.length; i++) {
|
|
68
|
+
const chunk = chunks[i];
|
|
69
|
+
// Format this chunk
|
|
70
|
+
let chunkText = '';
|
|
71
|
+
if (includeDocumentInfo) {
|
|
72
|
+
chunkText += `[Document ${i + 1}: ${chunk.document.title}]`;
|
|
73
|
+
if (includeScores) {
|
|
74
|
+
chunkText += ` (relevance: ${(chunk.score * 100).toFixed(1)}%)`;
|
|
75
|
+
}
|
|
76
|
+
chunkText += '\n';
|
|
77
|
+
}
|
|
78
|
+
chunkText += chunk.content;
|
|
79
|
+
// Check if adding this chunk would exceed budget
|
|
80
|
+
const chunkChars = chunkText.length + (includedChunks.length > 0 ? chunkSeparator.length : 0);
|
|
81
|
+
if (currentChars + chunkChars > maxChars) {
|
|
82
|
+
// Check if we can fit a truncated version of this chunk
|
|
83
|
+
const remainingChars = maxChars - currentChars - (includedChunks.length > 0 ? chunkSeparator.length : 0);
|
|
84
|
+
if (remainingChars > 100 && includedChunks.length === 0) {
|
|
85
|
+
// Truncate the first chunk if it's the only option
|
|
86
|
+
chunkText = chunkText.substring(0, remainingChars - 20) + '\n[Content truncated...]';
|
|
87
|
+
includedChunks.push(chunkText);
|
|
88
|
+
currentChars += chunkText.length;
|
|
89
|
+
}
|
|
90
|
+
truncated = true;
|
|
91
|
+
break;
|
|
92
|
+
}
|
|
93
|
+
includedChunks.push(chunkText);
|
|
94
|
+
currentChars += chunkChars;
|
|
95
|
+
}
|
|
96
|
+
const text = includedChunks.join(chunkSeparator);
|
|
97
|
+
const estimatedTokens = Math.ceil(text.length / tokenEstimationRatio);
|
|
98
|
+
return {
|
|
99
|
+
text,
|
|
100
|
+
estimatedTokens,
|
|
101
|
+
chunksIncluded: includedChunks.length,
|
|
102
|
+
totalChunks: chunks.length,
|
|
103
|
+
truncated
|
|
104
|
+
};
|
|
105
|
+
}
|
|
106
|
+
/**
|
|
107
|
+
* Build a complete prompt for the generator model
|
|
108
|
+
*
|
|
109
|
+
* @param options - Prompt building options
|
|
110
|
+
* @returns Built prompt with metadata
|
|
111
|
+
*/
|
|
112
|
+
export function buildPrompt(options) {
|
|
113
|
+
const { query, chunks, modelType, systemPrompt, maxContextLength, reservedOutputTokens, includeSourceAttribution = false } = options;
|
|
114
|
+
// Calculate available tokens for context
|
|
115
|
+
const promptOverhead = modelType === 'instruct' ? 150 : 50; // Tokens for formatting
|
|
116
|
+
const queryTokens = Math.ceil(query.length / 4);
|
|
117
|
+
const availableContextTokens = maxContextLength - reservedOutputTokens - promptOverhead - queryTokens;
|
|
118
|
+
// Format context chunks
|
|
119
|
+
const contextInfo = formatContextChunks(chunks, {
|
|
120
|
+
maxContextTokens: availableContextTokens,
|
|
121
|
+
includeDocumentInfo: true,
|
|
122
|
+
includeScores: false
|
|
123
|
+
});
|
|
124
|
+
// Build prompt based on model type
|
|
125
|
+
let prompt;
|
|
126
|
+
let systemPromptUsed;
|
|
127
|
+
if (modelType === 'instruct') {
|
|
128
|
+
prompt = buildInstructPrompt(query, contextInfo.text, systemPrompt, includeSourceAttribution);
|
|
129
|
+
systemPromptUsed = systemPrompt || (includeSourceAttribution ? DEFAULT_SYSTEM_PROMPT_WITH_ATTRIBUTION : DEFAULT_SYSTEM_PROMPT);
|
|
130
|
+
}
|
|
131
|
+
else {
|
|
132
|
+
prompt = buildCausalLMPrompt(query, contextInfo.text);
|
|
133
|
+
}
|
|
134
|
+
const estimatedTokens = Math.ceil(prompt.length / 4);
|
|
135
|
+
return {
|
|
136
|
+
prompt,
|
|
137
|
+
estimatedTokens,
|
|
138
|
+
contextInfo,
|
|
139
|
+
systemPromptUsed
|
|
140
|
+
};
|
|
141
|
+
}
|
|
142
|
+
/**
|
|
143
|
+
* Build prompt for instruct models (SmolLM2-Instruct)
|
|
144
|
+
* Uses chat template format with system/user/assistant roles
|
|
145
|
+
*/
|
|
146
|
+
function buildInstructPrompt(query, context, customSystemPrompt, includeSourceAttribution = false) {
|
|
147
|
+
const systemPrompt = customSystemPrompt ||
|
|
148
|
+
(includeSourceAttribution ? DEFAULT_SYSTEM_PROMPT_WITH_ATTRIBUTION : DEFAULT_SYSTEM_PROMPT);
|
|
149
|
+
const template = SMOLLM2_CHAT_TEMPLATE;
|
|
150
|
+
const userMessage = `Context:
|
|
151
|
+
${context}
|
|
152
|
+
|
|
153
|
+
Question: ${query}
|
|
154
|
+
|
|
155
|
+
Answer based only on the context above:`;
|
|
156
|
+
return `${template.systemStart}${systemPrompt}${template.systemEnd}${template.userStart}${userMessage}${template.userEnd}${template.assistantStart}`;
|
|
157
|
+
}
|
|
158
|
+
/**
|
|
159
|
+
* Build prompt for causal LM models (DistilGPT2)
|
|
160
|
+
* Uses simple document + question format without roles
|
|
161
|
+
*/
|
|
162
|
+
function buildCausalLMPrompt(query, context) {
|
|
163
|
+
return `The following documents contain information to answer the question.
|
|
164
|
+
|
|
165
|
+
Documents:
|
|
166
|
+
${context}
|
|
167
|
+
|
|
168
|
+
Based on the documents above, answer this question: ${query}
|
|
169
|
+
|
|
170
|
+
Answer:`;
|
|
171
|
+
}
|
|
172
|
+
// =============================================================================
|
|
173
|
+
// TOKEN ESTIMATION
|
|
174
|
+
// =============================================================================
|
|
175
|
+
/**
|
|
176
|
+
* Estimate token count for a string
|
|
177
|
+
* Uses a simple character-based heuristic (~4 chars per token for English)
|
|
178
|
+
*
|
|
179
|
+
* @param text - Text to estimate tokens for
|
|
180
|
+
* @returns Estimated token count
|
|
181
|
+
*/
|
|
182
|
+
export function estimateTokenCount(text) {
|
|
183
|
+
// Simple heuristic: ~4 characters per token for English text
|
|
184
|
+
// This is a rough approximation; actual tokenization varies by model
|
|
185
|
+
return Math.ceil(text.length / 4);
|
|
186
|
+
}
|
|
187
|
+
/**
|
|
188
|
+
* Calculate available context budget
|
|
189
|
+
*
|
|
190
|
+
* @param maxContextLength - Maximum context window size
|
|
191
|
+
* @param reservedOutputTokens - Tokens reserved for generation
|
|
192
|
+
* @param promptOverhead - Tokens used by prompt formatting
|
|
193
|
+
* @returns Available tokens for context chunks
|
|
194
|
+
*/
|
|
195
|
+
export function calculateContextBudget(maxContextLength, reservedOutputTokens, promptOverhead = 100) {
|
|
196
|
+
return Math.max(0, maxContextLength - reservedOutputTokens - promptOverhead);
|
|
197
|
+
}
|
|
198
|
+
// =============================================================================
|
|
199
|
+
// STOP SEQUENCES
|
|
200
|
+
// =============================================================================
|
|
201
|
+
/**
|
|
202
|
+
* Get default stop sequences for a model type
|
|
203
|
+
*
|
|
204
|
+
* @param modelType - Generator model type
|
|
205
|
+
* @returns Array of stop sequences
|
|
206
|
+
*/
|
|
207
|
+
export function getDefaultStopSequences(modelType) {
|
|
208
|
+
if (modelType === 'instruct') {
|
|
209
|
+
return [
|
|
210
|
+
SMOLLM2_CHAT_TEMPLATE.assistantEnd,
|
|
211
|
+
SMOLLM2_CHAT_TEMPLATE.endOfText,
|
|
212
|
+
'<|im_start|>',
|
|
213
|
+
'\n\nQuestion:',
|
|
214
|
+
'\n\nContext:'
|
|
215
|
+
];
|
|
216
|
+
}
|
|
217
|
+
// Causal LM stop sequences
|
|
218
|
+
return [
|
|
219
|
+
'\n\nQuestion:',
|
|
220
|
+
'\n\nDocuments:',
|
|
221
|
+
'\n\n---',
|
|
222
|
+
'<|endoftext|>'
|
|
223
|
+
];
|
|
224
|
+
}
|
|
225
|
+
//# sourceMappingURL=prompt-templates.js.map
|