rag-lite-ts 2.3.0 → 2.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cjs/cli/search.js +77 -2
- package/dist/cjs/cli.js +28 -1
- package/dist/cjs/core/abstract-generator.d.ts +97 -0
- package/dist/cjs/core/abstract-generator.js +222 -0
- package/dist/cjs/core/binary-index-format.js +47 -7
- package/dist/cjs/core/generator-registry.d.ts +114 -0
- package/dist/cjs/core/generator-registry.js +280 -0
- package/dist/cjs/core/index.d.ts +4 -0
- package/dist/cjs/core/index.js +11 -0
- package/dist/cjs/core/lazy-dependency-loader.d.ts +43 -0
- package/dist/cjs/core/lazy-dependency-loader.js +111 -2
- package/dist/cjs/core/prompt-templates.d.ts +138 -0
- package/dist/cjs/core/prompt-templates.js +225 -0
- package/dist/cjs/core/response-generator.d.ts +132 -0
- package/dist/cjs/core/response-generator.js +69 -0
- package/dist/cjs/core/search.d.ts +72 -1
- package/dist/cjs/core/search.js +79 -6
- package/dist/cjs/core/types.d.ts +1 -0
- package/dist/cjs/core/vector-index-worker.js +10 -0
- package/dist/cjs/core/vector-index.js +69 -19
- package/dist/cjs/factories/generator-factory.d.ts +88 -0
- package/dist/cjs/factories/generator-factory.js +151 -0
- package/dist/cjs/factories/index.d.ts +1 -0
- package/dist/cjs/factories/index.js +5 -0
- package/dist/cjs/index.d.ts +9 -0
- package/dist/cjs/index.js +16 -0
- package/dist/cjs/text/generators/causal-lm-generator.d.ts +65 -0
- package/dist/cjs/text/generators/causal-lm-generator.js +197 -0
- package/dist/cjs/text/generators/index.d.ts +10 -0
- package/dist/cjs/text/generators/index.js +10 -0
- package/dist/cjs/text/generators/instruct-generator.d.ts +62 -0
- package/dist/cjs/text/generators/instruct-generator.js +192 -0
- package/dist/esm/cli/search.js +77 -2
- package/dist/esm/cli.js +28 -1
- package/dist/esm/core/abstract-generator.d.ts +97 -0
- package/dist/esm/core/abstract-generator.js +222 -0
- package/dist/esm/core/binary-index-format.js +47 -7
- package/dist/esm/core/generator-registry.d.ts +114 -0
- package/dist/esm/core/generator-registry.js +280 -0
- package/dist/esm/core/index.d.ts +4 -0
- package/dist/esm/core/index.js +11 -0
- package/dist/esm/core/lazy-dependency-loader.d.ts +43 -0
- package/dist/esm/core/lazy-dependency-loader.js +111 -2
- package/dist/esm/core/prompt-templates.d.ts +138 -0
- package/dist/esm/core/prompt-templates.js +225 -0
- package/dist/esm/core/response-generator.d.ts +132 -0
- package/dist/esm/core/response-generator.js +69 -0
- package/dist/esm/core/search.d.ts +72 -1
- package/dist/esm/core/search.js +79 -6
- package/dist/esm/core/types.d.ts +1 -0
- package/dist/esm/core/vector-index-worker.js +10 -0
- package/dist/esm/core/vector-index.js +69 -19
- package/dist/esm/factories/generator-factory.d.ts +88 -0
- package/dist/esm/factories/generator-factory.js +151 -0
- package/dist/esm/factories/index.d.ts +1 -0
- package/dist/esm/factories/index.js +5 -0
- package/dist/esm/index.d.ts +9 -0
- package/dist/esm/index.js +16 -0
- package/dist/esm/text/generators/causal-lm-generator.d.ts +65 -0
- package/dist/esm/text/generators/causal-lm-generator.js +197 -0
- package/dist/esm/text/generators/index.d.ts +10 -0
- package/dist/esm/text/generators/index.js +10 -0
- package/dist/esm/text/generators/instruct-generator.d.ts +62 -0
- package/dist/esm/text/generators/instruct-generator.js +192 -0
- package/package.json +1 -1
|
@@ -0,0 +1,138 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* CORE MODULE — Prompt Templates for RAG Response Generation
|
|
3
|
+
*
|
|
4
|
+
* Provides prompt engineering utilities for different generator model types.
|
|
5
|
+
* Handles context formatting, token budget management, and system prompts.
|
|
6
|
+
*
|
|
7
|
+
* PROMPT STRATEGIES:
|
|
8
|
+
* - Instruct models: Use chat template with system/user/assistant roles
|
|
9
|
+
* - Causal LM models: Use simple document + question format
|
|
10
|
+
*
|
|
11
|
+
* @experimental This feature is experimental and may change in future versions.
|
|
12
|
+
*/
|
|
13
|
+
import type { SearchResult } from './types.js';
|
|
14
|
+
import type { GeneratorModelType } from './response-generator.js';
|
|
15
|
+
/**
|
|
16
|
+
* Default system prompt for instruct models
|
|
17
|
+
* Emphasizes grounded responses using only provided context
|
|
18
|
+
*/
|
|
19
|
+
export declare const DEFAULT_SYSTEM_PROMPT = "You are a helpful assistant that answers questions based ONLY on the provided context documents. Follow these rules strictly:\n\n1. Answer ONLY using information found in the context documents\n2. If the answer cannot be found in the context, say \"I cannot find this information in the provided documents\"\n3. Do not make up information or use external knowledge\n4. Be concise and direct in your response\n5. If the context is incomplete or unclear, acknowledge this limitation";
|
|
20
|
+
/**
|
|
21
|
+
* Default system prompt for RAG with source attribution
|
|
22
|
+
*/
|
|
23
|
+
export declare const DEFAULT_SYSTEM_PROMPT_WITH_ATTRIBUTION = "You are a helpful assistant that answers questions based ONLY on the provided context documents. Follow these rules strictly:\n\n1. Answer ONLY using information found in the context documents\n2. When possible, mention which document the information comes from\n3. If the answer cannot be found in the context, say \"I cannot find this information in the provided documents\"\n4. Do not make up information or use external knowledge\n5. Be concise and direct in your response";
|
|
24
|
+
/**
|
|
25
|
+
* SmolLM2 chat template format
|
|
26
|
+
* Uses <|im_start|> and <|im_end|> tokens
|
|
27
|
+
*/
|
|
28
|
+
export declare const SMOLLM2_CHAT_TEMPLATE: {
|
|
29
|
+
systemStart: string;
|
|
30
|
+
systemEnd: string;
|
|
31
|
+
userStart: string;
|
|
32
|
+
userEnd: string;
|
|
33
|
+
assistantStart: string;
|
|
34
|
+
assistantEnd: string;
|
|
35
|
+
endOfText: string;
|
|
36
|
+
};
|
|
37
|
+
/**
|
|
38
|
+
* Options for formatting context chunks
|
|
39
|
+
*/
|
|
40
|
+
export interface ContextFormattingOptions {
|
|
41
|
+
/** Maximum tokens available for context */
|
|
42
|
+
maxContextTokens: number;
|
|
43
|
+
/** Include document titles/sources */
|
|
44
|
+
includeDocumentInfo?: boolean;
|
|
45
|
+
/** Include relevance scores */
|
|
46
|
+
includeScores?: boolean;
|
|
47
|
+
/** Separator between chunks */
|
|
48
|
+
chunkSeparator?: string;
|
|
49
|
+
/** Token estimation function (chars to tokens ratio) */
|
|
50
|
+
tokenEstimationRatio?: number;
|
|
51
|
+
}
|
|
52
|
+
/**
|
|
53
|
+
* Result of context formatting
|
|
54
|
+
*/
|
|
55
|
+
export interface FormattedContext {
|
|
56
|
+
/** Formatted context string */
|
|
57
|
+
text: string;
|
|
58
|
+
/** Estimated token count */
|
|
59
|
+
estimatedTokens: number;
|
|
60
|
+
/** Number of chunks included */
|
|
61
|
+
chunksIncluded: number;
|
|
62
|
+
/** Total chunks available */
|
|
63
|
+
totalChunks: number;
|
|
64
|
+
/** Whether context was truncated */
|
|
65
|
+
truncated: boolean;
|
|
66
|
+
}
|
|
67
|
+
/**
|
|
68
|
+
* Format search result chunks into context string for the prompt
|
|
69
|
+
*
|
|
70
|
+
* @param chunks - Search result chunks to format
|
|
71
|
+
* @param options - Formatting options
|
|
72
|
+
* @returns Formatted context with metadata
|
|
73
|
+
*/
|
|
74
|
+
export declare function formatContextChunks(chunks: SearchResult[], options: ContextFormattingOptions): FormattedContext;
|
|
75
|
+
/**
|
|
76
|
+
* Options for building the complete prompt
|
|
77
|
+
*/
|
|
78
|
+
export interface PromptBuildOptions {
|
|
79
|
+
/** User's query */
|
|
80
|
+
query: string;
|
|
81
|
+
/** Search result chunks */
|
|
82
|
+
chunks: SearchResult[];
|
|
83
|
+
/** Generator model type */
|
|
84
|
+
modelType: GeneratorModelType;
|
|
85
|
+
/** Custom system prompt (optional) */
|
|
86
|
+
systemPrompt?: string;
|
|
87
|
+
/** Maximum context window tokens */
|
|
88
|
+
maxContextLength: number;
|
|
89
|
+
/** Tokens reserved for output */
|
|
90
|
+
reservedOutputTokens: number;
|
|
91
|
+
/** Include source attribution hint */
|
|
92
|
+
includeSourceAttribution?: boolean;
|
|
93
|
+
}
|
|
94
|
+
/**
|
|
95
|
+
* Result of prompt building
|
|
96
|
+
*/
|
|
97
|
+
export interface BuiltPrompt {
|
|
98
|
+
/** Complete prompt string */
|
|
99
|
+
prompt: string;
|
|
100
|
+
/** Estimated total tokens */
|
|
101
|
+
estimatedTokens: number;
|
|
102
|
+
/** Context metadata */
|
|
103
|
+
contextInfo: FormattedContext;
|
|
104
|
+
/** System prompt used (if any) */
|
|
105
|
+
systemPromptUsed?: string;
|
|
106
|
+
}
|
|
107
|
+
/**
|
|
108
|
+
* Build a complete prompt for the generator model
|
|
109
|
+
*
|
|
110
|
+
* @param options - Prompt building options
|
|
111
|
+
* @returns Built prompt with metadata
|
|
112
|
+
*/
|
|
113
|
+
export declare function buildPrompt(options: PromptBuildOptions): BuiltPrompt;
|
|
114
|
+
/**
|
|
115
|
+
* Estimate token count for a string
|
|
116
|
+
* Uses a simple character-based heuristic (~4 chars per token for English)
|
|
117
|
+
*
|
|
118
|
+
* @param text - Text to estimate tokens for
|
|
119
|
+
* @returns Estimated token count
|
|
120
|
+
*/
|
|
121
|
+
export declare function estimateTokenCount(text: string): number;
|
|
122
|
+
/**
|
|
123
|
+
* Calculate available context budget
|
|
124
|
+
*
|
|
125
|
+
* @param maxContextLength - Maximum context window size
|
|
126
|
+
* @param reservedOutputTokens - Tokens reserved for generation
|
|
127
|
+
* @param promptOverhead - Tokens used by prompt formatting
|
|
128
|
+
* @returns Available tokens for context chunks
|
|
129
|
+
*/
|
|
130
|
+
export declare function calculateContextBudget(maxContextLength: number, reservedOutputTokens: number, promptOverhead?: number): number;
|
|
131
|
+
/**
|
|
132
|
+
* Get default stop sequences for a model type
|
|
133
|
+
*
|
|
134
|
+
* @param modelType - Generator model type
|
|
135
|
+
* @returns Array of stop sequences
|
|
136
|
+
*/
|
|
137
|
+
export declare function getDefaultStopSequences(modelType: GeneratorModelType): string[];
|
|
138
|
+
//# sourceMappingURL=prompt-templates.d.ts.map
|
|
@@ -0,0 +1,225 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* CORE MODULE — Prompt Templates for RAG Response Generation
|
|
3
|
+
*
|
|
4
|
+
* Provides prompt engineering utilities for different generator model types.
|
|
5
|
+
* Handles context formatting, token budget management, and system prompts.
|
|
6
|
+
*
|
|
7
|
+
* PROMPT STRATEGIES:
|
|
8
|
+
* - Instruct models: Use chat template with system/user/assistant roles
|
|
9
|
+
* - Causal LM models: Use simple document + question format
|
|
10
|
+
*
|
|
11
|
+
* @experimental This feature is experimental and may change in future versions.
|
|
12
|
+
*/
|
|
13
|
+
// =============================================================================
|
|
14
|
+
// DEFAULT PROMPTS
|
|
15
|
+
// =============================================================================
|
|
16
|
+
/**
|
|
17
|
+
* Default system prompt for instruct models
|
|
18
|
+
* Emphasizes grounded responses using only provided context
|
|
19
|
+
*/
|
|
20
|
+
export const DEFAULT_SYSTEM_PROMPT = `You are a helpful assistant that answers questions based ONLY on the provided context documents. Follow these rules strictly:
|
|
21
|
+
|
|
22
|
+
1. Answer ONLY using information found in the context documents
|
|
23
|
+
2. If the answer cannot be found in the context, say "I cannot find this information in the provided documents"
|
|
24
|
+
3. Do not make up information or use external knowledge
|
|
25
|
+
4. Be concise and direct in your response
|
|
26
|
+
5. If the context is incomplete or unclear, acknowledge this limitation`;
|
|
27
|
+
/**
|
|
28
|
+
* Default system prompt for RAG with source attribution
|
|
29
|
+
*/
|
|
30
|
+
export const DEFAULT_SYSTEM_PROMPT_WITH_ATTRIBUTION = `You are a helpful assistant that answers questions based ONLY on the provided context documents. Follow these rules strictly:
|
|
31
|
+
|
|
32
|
+
1. Answer ONLY using information found in the context documents
|
|
33
|
+
2. When possible, mention which document the information comes from
|
|
34
|
+
3. If the answer cannot be found in the context, say "I cannot find this information in the provided documents"
|
|
35
|
+
4. Do not make up information or use external knowledge
|
|
36
|
+
5. Be concise and direct in your response`;
|
|
37
|
+
// =============================================================================
|
|
38
|
+
// CHAT TEMPLATES
|
|
39
|
+
// =============================================================================
|
|
40
|
+
/**
|
|
41
|
+
* SmolLM2 chat template format
|
|
42
|
+
* Uses <|im_start|> and <|im_end|> tokens
|
|
43
|
+
*/
|
|
44
|
+
export const SMOLLM2_CHAT_TEMPLATE = {
|
|
45
|
+
systemStart: '<|im_start|>system\n',
|
|
46
|
+
systemEnd: '<|im_end|>\n',
|
|
47
|
+
userStart: '<|im_start|>user\n',
|
|
48
|
+
userEnd: '<|im_end|>\n',
|
|
49
|
+
assistantStart: '<|im_start|>assistant\n',
|
|
50
|
+
assistantEnd: '<|im_end|>',
|
|
51
|
+
endOfText: '<|endoftext|>'
|
|
52
|
+
};
|
|
53
|
+
/**
|
|
54
|
+
* Format search result chunks into context string for the prompt
|
|
55
|
+
*
|
|
56
|
+
* @param chunks - Search result chunks to format
|
|
57
|
+
* @param options - Formatting options
|
|
58
|
+
* @returns Formatted context with metadata
|
|
59
|
+
*/
|
|
60
|
+
export function formatContextChunks(chunks, options) {
|
|
61
|
+
const { maxContextTokens, includeDocumentInfo = true, includeScores = false, chunkSeparator = '\n---\n', tokenEstimationRatio = 4 // ~4 chars per token for English
|
|
62
|
+
} = options;
|
|
63
|
+
const maxChars = maxContextTokens * tokenEstimationRatio;
|
|
64
|
+
let currentChars = 0;
|
|
65
|
+
const includedChunks = [];
|
|
66
|
+
let truncated = false;
|
|
67
|
+
for (let i = 0; i < chunks.length; i++) {
|
|
68
|
+
const chunk = chunks[i];
|
|
69
|
+
// Format this chunk
|
|
70
|
+
let chunkText = '';
|
|
71
|
+
if (includeDocumentInfo) {
|
|
72
|
+
chunkText += `[Document ${i + 1}: ${chunk.document.title}]`;
|
|
73
|
+
if (includeScores) {
|
|
74
|
+
chunkText += ` (relevance: ${(chunk.score * 100).toFixed(1)}%)`;
|
|
75
|
+
}
|
|
76
|
+
chunkText += '\n';
|
|
77
|
+
}
|
|
78
|
+
chunkText += chunk.content;
|
|
79
|
+
// Check if adding this chunk would exceed budget
|
|
80
|
+
const chunkChars = chunkText.length + (includedChunks.length > 0 ? chunkSeparator.length : 0);
|
|
81
|
+
if (currentChars + chunkChars > maxChars) {
|
|
82
|
+
// Check if we can fit a truncated version of this chunk
|
|
83
|
+
const remainingChars = maxChars - currentChars - (includedChunks.length > 0 ? chunkSeparator.length : 0);
|
|
84
|
+
if (remainingChars > 100 && includedChunks.length === 0) {
|
|
85
|
+
// Truncate the first chunk if it's the only option
|
|
86
|
+
chunkText = chunkText.substring(0, remainingChars - 20) + '\n[Content truncated...]';
|
|
87
|
+
includedChunks.push(chunkText);
|
|
88
|
+
currentChars += chunkText.length;
|
|
89
|
+
}
|
|
90
|
+
truncated = true;
|
|
91
|
+
break;
|
|
92
|
+
}
|
|
93
|
+
includedChunks.push(chunkText);
|
|
94
|
+
currentChars += chunkChars;
|
|
95
|
+
}
|
|
96
|
+
const text = includedChunks.join(chunkSeparator);
|
|
97
|
+
const estimatedTokens = Math.ceil(text.length / tokenEstimationRatio);
|
|
98
|
+
return {
|
|
99
|
+
text,
|
|
100
|
+
estimatedTokens,
|
|
101
|
+
chunksIncluded: includedChunks.length,
|
|
102
|
+
totalChunks: chunks.length,
|
|
103
|
+
truncated
|
|
104
|
+
};
|
|
105
|
+
}
|
|
106
|
+
/**
|
|
107
|
+
* Build a complete prompt for the generator model
|
|
108
|
+
*
|
|
109
|
+
* @param options - Prompt building options
|
|
110
|
+
* @returns Built prompt with metadata
|
|
111
|
+
*/
|
|
112
|
+
export function buildPrompt(options) {
|
|
113
|
+
const { query, chunks, modelType, systemPrompt, maxContextLength, reservedOutputTokens, includeSourceAttribution = false } = options;
|
|
114
|
+
// Calculate available tokens for context
|
|
115
|
+
const promptOverhead = modelType === 'instruct' ? 150 : 50; // Tokens for formatting
|
|
116
|
+
const queryTokens = Math.ceil(query.length / 4);
|
|
117
|
+
const availableContextTokens = maxContextLength - reservedOutputTokens - promptOverhead - queryTokens;
|
|
118
|
+
// Format context chunks
|
|
119
|
+
const contextInfo = formatContextChunks(chunks, {
|
|
120
|
+
maxContextTokens: availableContextTokens,
|
|
121
|
+
includeDocumentInfo: true,
|
|
122
|
+
includeScores: false
|
|
123
|
+
});
|
|
124
|
+
// Build prompt based on model type
|
|
125
|
+
let prompt;
|
|
126
|
+
let systemPromptUsed;
|
|
127
|
+
if (modelType === 'instruct') {
|
|
128
|
+
prompt = buildInstructPrompt(query, contextInfo.text, systemPrompt, includeSourceAttribution);
|
|
129
|
+
systemPromptUsed = systemPrompt || (includeSourceAttribution ? DEFAULT_SYSTEM_PROMPT_WITH_ATTRIBUTION : DEFAULT_SYSTEM_PROMPT);
|
|
130
|
+
}
|
|
131
|
+
else {
|
|
132
|
+
prompt = buildCausalLMPrompt(query, contextInfo.text);
|
|
133
|
+
}
|
|
134
|
+
const estimatedTokens = Math.ceil(prompt.length / 4);
|
|
135
|
+
return {
|
|
136
|
+
prompt,
|
|
137
|
+
estimatedTokens,
|
|
138
|
+
contextInfo,
|
|
139
|
+
systemPromptUsed
|
|
140
|
+
};
|
|
141
|
+
}
|
|
142
|
+
/**
|
|
143
|
+
* Build prompt for instruct models (SmolLM2-Instruct)
|
|
144
|
+
* Uses chat template format with system/user/assistant roles
|
|
145
|
+
*/
|
|
146
|
+
function buildInstructPrompt(query, context, customSystemPrompt, includeSourceAttribution = false) {
|
|
147
|
+
const systemPrompt = customSystemPrompt ||
|
|
148
|
+
(includeSourceAttribution ? DEFAULT_SYSTEM_PROMPT_WITH_ATTRIBUTION : DEFAULT_SYSTEM_PROMPT);
|
|
149
|
+
const template = SMOLLM2_CHAT_TEMPLATE;
|
|
150
|
+
const userMessage = `Context:
|
|
151
|
+
${context}
|
|
152
|
+
|
|
153
|
+
Question: ${query}
|
|
154
|
+
|
|
155
|
+
Answer based only on the context above:`;
|
|
156
|
+
return `${template.systemStart}${systemPrompt}${template.systemEnd}${template.userStart}${userMessage}${template.userEnd}${template.assistantStart}`;
|
|
157
|
+
}
|
|
158
|
+
/**
|
|
159
|
+
* Build prompt for causal LM models (DistilGPT2)
|
|
160
|
+
* Uses simple document + question format without roles
|
|
161
|
+
*/
|
|
162
|
+
function buildCausalLMPrompt(query, context) {
|
|
163
|
+
return `The following documents contain information to answer the question.
|
|
164
|
+
|
|
165
|
+
Documents:
|
|
166
|
+
${context}
|
|
167
|
+
|
|
168
|
+
Based on the documents above, answer this question: ${query}
|
|
169
|
+
|
|
170
|
+
Answer:`;
|
|
171
|
+
}
|
|
172
|
+
// =============================================================================
|
|
173
|
+
// TOKEN ESTIMATION
|
|
174
|
+
// =============================================================================
|
|
175
|
+
/**
|
|
176
|
+
* Estimate token count for a string
|
|
177
|
+
* Uses a simple character-based heuristic (~4 chars per token for English)
|
|
178
|
+
*
|
|
179
|
+
* @param text - Text to estimate tokens for
|
|
180
|
+
* @returns Estimated token count
|
|
181
|
+
*/
|
|
182
|
+
export function estimateTokenCount(text) {
|
|
183
|
+
// Simple heuristic: ~4 characters per token for English text
|
|
184
|
+
// This is a rough approximation; actual tokenization varies by model
|
|
185
|
+
return Math.ceil(text.length / 4);
|
|
186
|
+
}
|
|
187
|
+
/**
|
|
188
|
+
* Calculate available context budget
|
|
189
|
+
*
|
|
190
|
+
* @param maxContextLength - Maximum context window size
|
|
191
|
+
* @param reservedOutputTokens - Tokens reserved for generation
|
|
192
|
+
* @param promptOverhead - Tokens used by prompt formatting
|
|
193
|
+
* @returns Available tokens for context chunks
|
|
194
|
+
*/
|
|
195
|
+
export function calculateContextBudget(maxContextLength, reservedOutputTokens, promptOverhead = 100) {
|
|
196
|
+
return Math.max(0, maxContextLength - reservedOutputTokens - promptOverhead);
|
|
197
|
+
}
|
|
198
|
+
// =============================================================================
|
|
199
|
+
// STOP SEQUENCES
|
|
200
|
+
// =============================================================================
|
|
201
|
+
/**
|
|
202
|
+
* Get default stop sequences for a model type
|
|
203
|
+
*
|
|
204
|
+
* @param modelType - Generator model type
|
|
205
|
+
* @returns Array of stop sequences
|
|
206
|
+
*/
|
|
207
|
+
export function getDefaultStopSequences(modelType) {
|
|
208
|
+
if (modelType === 'instruct') {
|
|
209
|
+
return [
|
|
210
|
+
SMOLLM2_CHAT_TEMPLATE.assistantEnd,
|
|
211
|
+
SMOLLM2_CHAT_TEMPLATE.endOfText,
|
|
212
|
+
'<|im_start|>',
|
|
213
|
+
'\n\nQuestion:',
|
|
214
|
+
'\n\nContext:'
|
|
215
|
+
];
|
|
216
|
+
}
|
|
217
|
+
// Causal LM stop sequences
|
|
218
|
+
return [
|
|
219
|
+
'\n\nQuestion:',
|
|
220
|
+
'\n\nDocuments:',
|
|
221
|
+
'\n\n---',
|
|
222
|
+
'<|endoftext|>'
|
|
223
|
+
];
|
|
224
|
+
}
|
|
225
|
+
//# sourceMappingURL=prompt-templates.js.map
|
|
@@ -0,0 +1,132 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* CORE MODULE — Response Generator Interface for RAG Response Generation
|
|
3
|
+
*
|
|
4
|
+
* Model-agnostic interfaces supporting text generation from retrieved context.
|
|
5
|
+
* Designed for runtime polymorphism and extensibility, following the same
|
|
6
|
+
* patterns established by the UniversalEmbedder interface.
|
|
7
|
+
*
|
|
8
|
+
* SUPPORTED MODELS:
|
|
9
|
+
* - HuggingFaceTB/SmolLM2-135M-Instruct (instruct, balanced, recommended, 3 chunks default)
|
|
10
|
+
* - HuggingFaceTB/SmolLM2-360M-Instruct (instruct, higher quality, 5 chunks default)
|
|
11
|
+
*
|
|
12
|
+
* PREREQUISITES:
|
|
13
|
+
* - Reranking must be enabled for response generation
|
|
14
|
+
*
|
|
15
|
+
* @experimental This feature is experimental and may change in future versions.
|
|
16
|
+
*/
|
|
17
|
+
import type { SearchResult } from './types.js';
|
|
18
|
+
/** Supported generator model types */
|
|
19
|
+
export type GeneratorModelType = 'causal-lm' | 'instruct';
|
|
20
|
+
/** Generation request containing query and retrieved context */
|
|
21
|
+
export interface GenerationRequest {
|
|
22
|
+
query: string;
|
|
23
|
+
chunks: SearchResult[];
|
|
24
|
+
systemPrompt?: string;
|
|
25
|
+
maxTokens?: number;
|
|
26
|
+
temperature?: number;
|
|
27
|
+
topP?: number;
|
|
28
|
+
topK?: number;
|
|
29
|
+
repetitionPenalty?: number;
|
|
30
|
+
stopSequences?: string[];
|
|
31
|
+
includeSourceAttribution?: boolean;
|
|
32
|
+
/** Maximum number of chunks to include in context (overrides model default) */
|
|
33
|
+
maxChunksForContext?: number;
|
|
34
|
+
}
|
|
35
|
+
/** Result of text generation */
|
|
36
|
+
export interface GenerationResult {
|
|
37
|
+
response: string;
|
|
38
|
+
tokensUsed: number;
|
|
39
|
+
truncated: boolean;
|
|
40
|
+
modelName: string;
|
|
41
|
+
generationTimeMs: number;
|
|
42
|
+
metadata: {
|
|
43
|
+
promptTokens: number;
|
|
44
|
+
completionTokens: number;
|
|
45
|
+
chunksIncluded: number;
|
|
46
|
+
totalChunks: number;
|
|
47
|
+
finishReason: 'complete' | 'length' | 'stop_sequence' | 'error';
|
|
48
|
+
};
|
|
49
|
+
}
|
|
50
|
+
/** Generator model capabilities */
|
|
51
|
+
export interface GeneratorCapabilities {
|
|
52
|
+
supportsStreaming: boolean;
|
|
53
|
+
supportsSystemPrompt: boolean;
|
|
54
|
+
instructionTuned: boolean;
|
|
55
|
+
maxContextLength: number;
|
|
56
|
+
defaultMaxOutputTokens: number;
|
|
57
|
+
recommendedTemperature: number;
|
|
58
|
+
/** Maximum number of chunks to use for context (default varies by model) */
|
|
59
|
+
defaultMaxChunksForContext: number;
|
|
60
|
+
}
|
|
61
|
+
/** Generator model requirements */
|
|
62
|
+
export interface GeneratorRequirements {
|
|
63
|
+
transformersJsVersion: string;
|
|
64
|
+
minimumMemory: number;
|
|
65
|
+
requiredFeatures: readonly string[];
|
|
66
|
+
platformSupport: readonly string[];
|
|
67
|
+
}
|
|
68
|
+
/** Complete generator model information */
|
|
69
|
+
export interface GeneratorModelInfo {
|
|
70
|
+
name: string;
|
|
71
|
+
type: GeneratorModelType;
|
|
72
|
+
version: string;
|
|
73
|
+
capabilities: GeneratorCapabilities;
|
|
74
|
+
requirements: GeneratorRequirements;
|
|
75
|
+
isDefault?: boolean;
|
|
76
|
+
description?: string;
|
|
77
|
+
}
|
|
78
|
+
/** Generator validation result */
|
|
79
|
+
export interface GeneratorValidationResult {
|
|
80
|
+
isValid: boolean;
|
|
81
|
+
errors: string[];
|
|
82
|
+
warnings: string[];
|
|
83
|
+
suggestions: string[];
|
|
84
|
+
}
|
|
85
|
+
/** Options for creating generator instances */
|
|
86
|
+
export interface GeneratorCreationOptions {
|
|
87
|
+
cachePath?: string;
|
|
88
|
+
timeout?: number;
|
|
89
|
+
enableGPU?: boolean;
|
|
90
|
+
defaultGenerationOptions?: Partial<GenerationRequest>;
|
|
91
|
+
customConfig?: Record<string, any>;
|
|
92
|
+
}
|
|
93
|
+
/**
|
|
94
|
+
* Universal response generator interface
|
|
95
|
+
* @experimental This feature is experimental and may change in future versions.
|
|
96
|
+
*/
|
|
97
|
+
export interface ResponseGenerator {
|
|
98
|
+
readonly modelName: string;
|
|
99
|
+
readonly modelType: GeneratorModelType;
|
|
100
|
+
readonly maxContextLength: number;
|
|
101
|
+
readonly maxOutputLength: number;
|
|
102
|
+
generate(request: GenerationRequest): Promise<GenerationResult>;
|
|
103
|
+
generateStream?(request: GenerationRequest): AsyncIterable<string>;
|
|
104
|
+
loadModel(): Promise<void>;
|
|
105
|
+
isLoaded(): boolean;
|
|
106
|
+
getModelInfo(): GeneratorModelInfo;
|
|
107
|
+
cleanup(): Promise<void>;
|
|
108
|
+
}
|
|
109
|
+
export type GenerateFunction = (query: string, chunks: SearchResult[], options?: Partial<GenerationRequest>) => Promise<GenerationResult>;
|
|
110
|
+
export type CreateGeneratorFunction = (modelName: string, options?: GeneratorCreationOptions) => Promise<ResponseGenerator>;
|
|
111
|
+
export declare class GeneratorValidationError extends Error {
|
|
112
|
+
readonly modelName: string;
|
|
113
|
+
readonly availableModels: readonly string[];
|
|
114
|
+
constructor(modelName: string, availableModels: readonly string[], message: string);
|
|
115
|
+
}
|
|
116
|
+
export declare class GenerationError extends Error {
|
|
117
|
+
readonly modelName: string;
|
|
118
|
+
readonly stage: 'loading' | 'tokenization' | 'generation' | 'decoding';
|
|
119
|
+
readonly cause?: Error | undefined;
|
|
120
|
+
constructor(modelName: string, stage: 'loading' | 'tokenization' | 'generation' | 'decoding', message: string, cause?: Error | undefined);
|
|
121
|
+
}
|
|
122
|
+
export declare class ContextWindowError extends Error {
|
|
123
|
+
readonly requiredTokens: number;
|
|
124
|
+
readonly availableTokens: number;
|
|
125
|
+
constructor(requiredTokens: number, availableTokens: number, message: string);
|
|
126
|
+
}
|
|
127
|
+
export declare function supportsStreaming(generator: ResponseGenerator): generator is ResponseGenerator & {
|
|
128
|
+
generateStream(request: GenerationRequest): AsyncIterable<string>;
|
|
129
|
+
};
|
|
130
|
+
export declare function isInstructModel(generator: ResponseGenerator): boolean;
|
|
131
|
+
export declare function createGenerateFunction(generator: ResponseGenerator): GenerateFunction;
|
|
132
|
+
//# sourceMappingURL=response-generator.d.ts.map
|
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* CORE MODULE — Response Generator Interface for RAG Response Generation
|
|
3
|
+
*
|
|
4
|
+
* Model-agnostic interfaces supporting text generation from retrieved context.
|
|
5
|
+
* Designed for runtime polymorphism and extensibility, following the same
|
|
6
|
+
* patterns established by the UniversalEmbedder interface.
|
|
7
|
+
*
|
|
8
|
+
* SUPPORTED MODELS:
|
|
9
|
+
* - HuggingFaceTB/SmolLM2-135M-Instruct (instruct, balanced, recommended, 3 chunks default)
|
|
10
|
+
* - HuggingFaceTB/SmolLM2-360M-Instruct (instruct, higher quality, 5 chunks default)
|
|
11
|
+
*
|
|
12
|
+
* PREREQUISITES:
|
|
13
|
+
* - Reranking must be enabled for response generation
|
|
14
|
+
*
|
|
15
|
+
* @experimental This feature is experimental and may change in future versions.
|
|
16
|
+
*/
|
|
17
|
+
// =============================================================================
|
|
18
|
+
// ERROR CLASSES
|
|
19
|
+
// =============================================================================
|
|
20
|
+
export class GeneratorValidationError extends Error {
|
|
21
|
+
modelName;
|
|
22
|
+
availableModels;
|
|
23
|
+
constructor(modelName, availableModels, message) {
|
|
24
|
+
super(message);
|
|
25
|
+
this.modelName = modelName;
|
|
26
|
+
this.availableModels = availableModels;
|
|
27
|
+
this.name = 'GeneratorValidationError';
|
|
28
|
+
}
|
|
29
|
+
}
|
|
30
|
+
export class GenerationError extends Error {
|
|
31
|
+
modelName;
|
|
32
|
+
stage;
|
|
33
|
+
cause;
|
|
34
|
+
constructor(modelName, stage, message, cause) {
|
|
35
|
+
super(message);
|
|
36
|
+
this.modelName = modelName;
|
|
37
|
+
this.stage = stage;
|
|
38
|
+
this.cause = cause;
|
|
39
|
+
this.name = 'GenerationError';
|
|
40
|
+
}
|
|
41
|
+
}
|
|
42
|
+
export class ContextWindowError extends Error {
|
|
43
|
+
requiredTokens;
|
|
44
|
+
availableTokens;
|
|
45
|
+
constructor(requiredTokens, availableTokens, message) {
|
|
46
|
+
super(message);
|
|
47
|
+
this.requiredTokens = requiredTokens;
|
|
48
|
+
this.availableTokens = availableTokens;
|
|
49
|
+
this.name = 'ContextWindowError';
|
|
50
|
+
}
|
|
51
|
+
}
|
|
52
|
+
// =============================================================================
|
|
53
|
+
// UTILITY FUNCTIONS
|
|
54
|
+
// =============================================================================
|
|
55
|
+
export function supportsStreaming(generator) {
|
|
56
|
+
return typeof generator.generateStream === 'function';
|
|
57
|
+
}
|
|
58
|
+
export function isInstructModel(generator) {
|
|
59
|
+
return generator.modelType === 'instruct';
|
|
60
|
+
}
|
|
61
|
+
export function createGenerateFunction(generator) {
|
|
62
|
+
return async (query, chunks, options) => {
|
|
63
|
+
if (!generator.isLoaded()) {
|
|
64
|
+
await generator.loadModel();
|
|
65
|
+
}
|
|
66
|
+
return generator.generate({ query, chunks, ...options });
|
|
67
|
+
};
|
|
68
|
+
}
|
|
69
|
+
//# sourceMappingURL=response-generator.js.map
|
|
@@ -6,17 +6,54 @@ import { IndexManager } from '../index-manager.js';
|
|
|
6
6
|
import { DatabaseConnection } from './db.js';
|
|
7
7
|
import type { SearchResult, SearchOptions } from './types.js';
|
|
8
8
|
import type { EmbedFunction, RerankFunction } from './interfaces.js';
|
|
9
|
+
import type { GenerateFunction } from './response-generator.js';
|
|
9
10
|
/**
|
|
10
11
|
* Search engine that provides semantic search capabilities
|
|
11
12
|
* Implements the core search pipeline: query embedding → vector search → metadata retrieval → optional reranking
|
|
12
13
|
* Uses explicit dependency injection for clean architecture
|
|
13
14
|
*/
|
|
15
|
+
/**
|
|
16
|
+
* Extended search options with generation support
|
|
17
|
+
* @experimental Generation features are experimental
|
|
18
|
+
*/
|
|
19
|
+
export interface ExtendedSearchOptions extends SearchOptions {
|
|
20
|
+
/** Enable AI response generation from search results */
|
|
21
|
+
generateResponse?: boolean;
|
|
22
|
+
/** Generator model to use (default: SmolLM2-135M-Instruct) */
|
|
23
|
+
generatorModel?: string;
|
|
24
|
+
/** Generation options */
|
|
25
|
+
generationOptions?: {
|
|
26
|
+
maxTokens?: number;
|
|
27
|
+
temperature?: number;
|
|
28
|
+
systemPrompt?: string;
|
|
29
|
+
/** Maximum chunks to use for context (overrides model default) */
|
|
30
|
+
maxChunksForContext?: number;
|
|
31
|
+
};
|
|
32
|
+
}
|
|
33
|
+
/**
|
|
34
|
+
* Search result with optional generated response
|
|
35
|
+
* @experimental Generation features are experimental
|
|
36
|
+
*/
|
|
37
|
+
export interface SearchResultWithGeneration {
|
|
38
|
+
/** Search results (chunks) */
|
|
39
|
+
results: SearchResult[];
|
|
40
|
+
/** Generated response (if generation was enabled) */
|
|
41
|
+
generation?: {
|
|
42
|
+
response: string;
|
|
43
|
+
modelUsed: string;
|
|
44
|
+
tokensUsed: number;
|
|
45
|
+
truncated: boolean;
|
|
46
|
+
chunksUsedForContext: number;
|
|
47
|
+
generationTimeMs: number;
|
|
48
|
+
};
|
|
49
|
+
}
|
|
14
50
|
export declare class SearchEngine {
|
|
15
51
|
private embedFn;
|
|
16
52
|
private indexManager;
|
|
17
53
|
private db;
|
|
18
54
|
private rerankFn?;
|
|
19
55
|
private contentResolver?;
|
|
56
|
+
private generateFn?;
|
|
20
57
|
/**
|
|
21
58
|
* Creates a new SearchEngine with explicit dependency injection
|
|
22
59
|
*
|
|
@@ -71,7 +108,41 @@ export declare class SearchEngine {
|
|
|
71
108
|
* const search = new SearchEngine(customEmbedFn, indexManager, db);
|
|
72
109
|
* ```
|
|
73
110
|
*/
|
|
74
|
-
constructor(embedFn: EmbedFunction, indexManager: IndexManager, db: DatabaseConnection, rerankFn?: RerankFunction | undefined, contentResolver?: import('./content-resolver.js').ContentResolver);
|
|
111
|
+
constructor(embedFn: EmbedFunction, indexManager: IndexManager, db: DatabaseConnection, rerankFn?: RerankFunction | undefined, contentResolver?: import('./content-resolver.js').ContentResolver, generateFn?: GenerateFunction);
|
|
112
|
+
/**
|
|
113
|
+
* Set or update the generate function
|
|
114
|
+
* @experimental This method is experimental and may change
|
|
115
|
+
*/
|
|
116
|
+
setGenerateFunction(generateFn: GenerateFunction | undefined): void;
|
|
117
|
+
/**
|
|
118
|
+
* Check if generation is available
|
|
119
|
+
* @experimental This method is experimental and may change
|
|
120
|
+
*/
|
|
121
|
+
hasGenerationCapability(): boolean;
|
|
122
|
+
/**
|
|
123
|
+
* Perform semantic search with optional AI response generation
|
|
124
|
+
*
|
|
125
|
+
* This method extends the standard search with optional response generation.
|
|
126
|
+
* When generation is enabled, the retrieved chunks are used as context for
|
|
127
|
+
* an AI model to generate a synthesized response.
|
|
128
|
+
*
|
|
129
|
+
* @param query - Search query string
|
|
130
|
+
* @param options - Extended search options including generation settings
|
|
131
|
+
* @returns Promise resolving to search results with optional generated response
|
|
132
|
+
*
|
|
133
|
+
* @example
|
|
134
|
+
* ```typescript
|
|
135
|
+
* // Search with generation
|
|
136
|
+
* const result = await search.searchWithGeneration('How does auth work?', {
|
|
137
|
+
* top_k: 5,
|
|
138
|
+
* generateResponse: true
|
|
139
|
+
* });
|
|
140
|
+
* console.log(result.generation?.response);
|
|
141
|
+
* ```
|
|
142
|
+
*
|
|
143
|
+
* @experimental This method is experimental and may change in future versions.
|
|
144
|
+
*/
|
|
145
|
+
searchWithGeneration(query: string, options?: ExtendedSearchOptions): Promise<SearchResultWithGeneration>;
|
|
75
146
|
/**
|
|
76
147
|
* Perform semantic search on the indexed documents
|
|
77
148
|
* Implements the core search pipeline: query embedding → vector search → metadata retrieval → optional reranking
|