rag-lite-ts 2.3.0 → 2.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (65) hide show
  1. package/dist/cjs/cli/search.js +77 -2
  2. package/dist/cjs/cli.js +28 -1
  3. package/dist/cjs/core/abstract-generator.d.ts +97 -0
  4. package/dist/cjs/core/abstract-generator.js +222 -0
  5. package/dist/cjs/core/binary-index-format.js +47 -7
  6. package/dist/cjs/core/generator-registry.d.ts +114 -0
  7. package/dist/cjs/core/generator-registry.js +280 -0
  8. package/dist/cjs/core/index.d.ts +4 -0
  9. package/dist/cjs/core/index.js +11 -0
  10. package/dist/cjs/core/lazy-dependency-loader.d.ts +43 -0
  11. package/dist/cjs/core/lazy-dependency-loader.js +111 -2
  12. package/dist/cjs/core/prompt-templates.d.ts +138 -0
  13. package/dist/cjs/core/prompt-templates.js +225 -0
  14. package/dist/cjs/core/response-generator.d.ts +132 -0
  15. package/dist/cjs/core/response-generator.js +69 -0
  16. package/dist/cjs/core/search.d.ts +72 -1
  17. package/dist/cjs/core/search.js +79 -6
  18. package/dist/cjs/core/types.d.ts +1 -0
  19. package/dist/cjs/core/vector-index-worker.js +10 -0
  20. package/dist/cjs/core/vector-index.js +69 -19
  21. package/dist/cjs/factories/generator-factory.d.ts +88 -0
  22. package/dist/cjs/factories/generator-factory.js +151 -0
  23. package/dist/cjs/factories/index.d.ts +1 -0
  24. package/dist/cjs/factories/index.js +5 -0
  25. package/dist/cjs/index.d.ts +9 -0
  26. package/dist/cjs/index.js +16 -0
  27. package/dist/cjs/text/generators/causal-lm-generator.d.ts +65 -0
  28. package/dist/cjs/text/generators/causal-lm-generator.js +197 -0
  29. package/dist/cjs/text/generators/index.d.ts +10 -0
  30. package/dist/cjs/text/generators/index.js +10 -0
  31. package/dist/cjs/text/generators/instruct-generator.d.ts +62 -0
  32. package/dist/cjs/text/generators/instruct-generator.js +192 -0
  33. package/dist/esm/cli/search.js +77 -2
  34. package/dist/esm/cli.js +28 -1
  35. package/dist/esm/core/abstract-generator.d.ts +97 -0
  36. package/dist/esm/core/abstract-generator.js +222 -0
  37. package/dist/esm/core/binary-index-format.js +47 -7
  38. package/dist/esm/core/generator-registry.d.ts +114 -0
  39. package/dist/esm/core/generator-registry.js +280 -0
  40. package/dist/esm/core/index.d.ts +4 -0
  41. package/dist/esm/core/index.js +11 -0
  42. package/dist/esm/core/lazy-dependency-loader.d.ts +43 -0
  43. package/dist/esm/core/lazy-dependency-loader.js +111 -2
  44. package/dist/esm/core/prompt-templates.d.ts +138 -0
  45. package/dist/esm/core/prompt-templates.js +225 -0
  46. package/dist/esm/core/response-generator.d.ts +132 -0
  47. package/dist/esm/core/response-generator.js +69 -0
  48. package/dist/esm/core/search.d.ts +72 -1
  49. package/dist/esm/core/search.js +79 -6
  50. package/dist/esm/core/types.d.ts +1 -0
  51. package/dist/esm/core/vector-index-worker.js +10 -0
  52. package/dist/esm/core/vector-index.js +69 -19
  53. package/dist/esm/factories/generator-factory.d.ts +88 -0
  54. package/dist/esm/factories/generator-factory.js +151 -0
  55. package/dist/esm/factories/index.d.ts +1 -0
  56. package/dist/esm/factories/index.js +5 -0
  57. package/dist/esm/index.d.ts +9 -0
  58. package/dist/esm/index.js +16 -0
  59. package/dist/esm/text/generators/causal-lm-generator.d.ts +65 -0
  60. package/dist/esm/text/generators/causal-lm-generator.js +197 -0
  61. package/dist/esm/text/generators/index.d.ts +10 -0
  62. package/dist/esm/text/generators/index.js +10 -0
  63. package/dist/esm/text/generators/instruct-generator.d.ts +62 -0
  64. package/dist/esm/text/generators/instruct-generator.js +192 -0
  65. package/package.json +1 -1
@@ -0,0 +1,225 @@
1
+ /**
2
+ * CORE MODULE — Prompt Templates for RAG Response Generation
3
+ *
4
+ * Provides prompt engineering utilities for different generator model types.
5
+ * Handles context formatting, token budget management, and system prompts.
6
+ *
7
+ * PROMPT STRATEGIES:
8
+ * - Instruct models: Use chat template with system/user/assistant roles
9
+ * - Causal LM models: Use simple document + question format
10
+ *
11
+ * @experimental This feature is experimental and may change in future versions.
12
+ */
13
+ // =============================================================================
14
+ // DEFAULT PROMPTS
15
+ // =============================================================================
16
+ /**
17
+ * Default system prompt for instruct models
18
+ * Emphasizes grounded responses using only provided context
19
+ */
20
+ export const DEFAULT_SYSTEM_PROMPT = `You are a helpful assistant that answers questions based ONLY on the provided context documents. Follow these rules strictly:
21
+
22
+ 1. Answer ONLY using information found in the context documents
23
+ 2. If the answer cannot be found in the context, say "I cannot find this information in the provided documents"
24
+ 3. Do not make up information or use external knowledge
25
+ 4. Be concise and direct in your response
26
+ 5. If the context is incomplete or unclear, acknowledge this limitation`;
27
+ /**
28
+ * Default system prompt for RAG with source attribution
29
+ */
30
+ export const DEFAULT_SYSTEM_PROMPT_WITH_ATTRIBUTION = `You are a helpful assistant that answers questions based ONLY on the provided context documents. Follow these rules strictly:
31
+
32
+ 1. Answer ONLY using information found in the context documents
33
+ 2. When possible, mention which document the information comes from
34
+ 3. If the answer cannot be found in the context, say "I cannot find this information in the provided documents"
35
+ 4. Do not make up information or use external knowledge
36
+ 5. Be concise and direct in your response`;
37
+ // =============================================================================
38
+ // CHAT TEMPLATES
39
+ // =============================================================================
40
+ /**
41
+ * SmolLM2 chat template format
42
+ * Uses <|im_start|> and <|im_end|> tokens
43
+ */
44
+ export const SMOLLM2_CHAT_TEMPLATE = {
45
+ systemStart: '<|im_start|>system\n',
46
+ systemEnd: '<|im_end|>\n',
47
+ userStart: '<|im_start|>user\n',
48
+ userEnd: '<|im_end|>\n',
49
+ assistantStart: '<|im_start|>assistant\n',
50
+ assistantEnd: '<|im_end|>',
51
+ endOfText: '<|endoftext|>'
52
+ };
53
+ /**
54
+ * Format search result chunks into context string for the prompt
55
+ *
56
+ * @param chunks - Search result chunks to format
57
+ * @param options - Formatting options
58
+ * @returns Formatted context with metadata
59
+ */
60
+ export function formatContextChunks(chunks, options) {
61
+ const { maxContextTokens, includeDocumentInfo = true, includeScores = false, chunkSeparator = '\n---\n', tokenEstimationRatio = 4 // ~4 chars per token for English
62
+ } = options;
63
+ const maxChars = maxContextTokens * tokenEstimationRatio;
64
+ let currentChars = 0;
65
+ const includedChunks = [];
66
+ let truncated = false;
67
+ for (let i = 0; i < chunks.length; i++) {
68
+ const chunk = chunks[i];
69
+ // Format this chunk
70
+ let chunkText = '';
71
+ if (includeDocumentInfo) {
72
+ chunkText += `[Document ${i + 1}: ${chunk.document.title}]`;
73
+ if (includeScores) {
74
+ chunkText += ` (relevance: ${(chunk.score * 100).toFixed(1)}%)`;
75
+ }
76
+ chunkText += '\n';
77
+ }
78
+ chunkText += chunk.content;
79
+ // Check if adding this chunk would exceed budget
80
+ const chunkChars = chunkText.length + (includedChunks.length > 0 ? chunkSeparator.length : 0);
81
+ if (currentChars + chunkChars > maxChars) {
82
+ // Check if we can fit a truncated version of this chunk
83
+ const remainingChars = maxChars - currentChars - (includedChunks.length > 0 ? chunkSeparator.length : 0);
84
+ if (remainingChars > 100 && includedChunks.length === 0) {
85
+ // Truncate the first chunk if it's the only option
86
+ chunkText = chunkText.substring(0, remainingChars - 20) + '\n[Content truncated...]';
87
+ includedChunks.push(chunkText);
88
+ currentChars += chunkText.length;
89
+ }
90
+ truncated = true;
91
+ break;
92
+ }
93
+ includedChunks.push(chunkText);
94
+ currentChars += chunkChars;
95
+ }
96
+ const text = includedChunks.join(chunkSeparator);
97
+ const estimatedTokens = Math.ceil(text.length / tokenEstimationRatio);
98
+ return {
99
+ text,
100
+ estimatedTokens,
101
+ chunksIncluded: includedChunks.length,
102
+ totalChunks: chunks.length,
103
+ truncated
104
+ };
105
+ }
106
+ /**
107
+ * Build a complete prompt for the generator model
108
+ *
109
+ * @param options - Prompt building options
110
+ * @returns Built prompt with metadata
111
+ */
112
+ export function buildPrompt(options) {
113
+ const { query, chunks, modelType, systemPrompt, maxContextLength, reservedOutputTokens, includeSourceAttribution = false } = options;
114
+ // Calculate available tokens for context
115
+ const promptOverhead = modelType === 'instruct' ? 150 : 50; // Tokens for formatting
116
+ const queryTokens = Math.ceil(query.length / 4);
117
+ const availableContextTokens = maxContextLength - reservedOutputTokens - promptOverhead - queryTokens;
118
+ // Format context chunks
119
+ const contextInfo = formatContextChunks(chunks, {
120
+ maxContextTokens: availableContextTokens,
121
+ includeDocumentInfo: true,
122
+ includeScores: false
123
+ });
124
+ // Build prompt based on model type
125
+ let prompt;
126
+ let systemPromptUsed;
127
+ if (modelType === 'instruct') {
128
+ prompt = buildInstructPrompt(query, contextInfo.text, systemPrompt, includeSourceAttribution);
129
+ systemPromptUsed = systemPrompt || (includeSourceAttribution ? DEFAULT_SYSTEM_PROMPT_WITH_ATTRIBUTION : DEFAULT_SYSTEM_PROMPT);
130
+ }
131
+ else {
132
+ prompt = buildCausalLMPrompt(query, contextInfo.text);
133
+ }
134
+ const estimatedTokens = Math.ceil(prompt.length / 4);
135
+ return {
136
+ prompt,
137
+ estimatedTokens,
138
+ contextInfo,
139
+ systemPromptUsed
140
+ };
141
+ }
142
+ /**
143
+ * Build prompt for instruct models (SmolLM2-Instruct)
144
+ * Uses chat template format with system/user/assistant roles
145
+ */
146
+ function buildInstructPrompt(query, context, customSystemPrompt, includeSourceAttribution = false) {
147
+ const systemPrompt = customSystemPrompt ||
148
+ (includeSourceAttribution ? DEFAULT_SYSTEM_PROMPT_WITH_ATTRIBUTION : DEFAULT_SYSTEM_PROMPT);
149
+ const template = SMOLLM2_CHAT_TEMPLATE;
150
+ const userMessage = `Context:
151
+ ${context}
152
+
153
+ Question: ${query}
154
+
155
+ Answer based only on the context above:`;
156
+ return `${template.systemStart}${systemPrompt}${template.systemEnd}${template.userStart}${userMessage}${template.userEnd}${template.assistantStart}`;
157
+ }
158
+ /**
159
+ * Build prompt for causal LM models (DistilGPT2)
160
+ * Uses simple document + question format without roles
161
+ */
162
+ function buildCausalLMPrompt(query, context) {
163
+ return `The following documents contain information to answer the question.
164
+
165
+ Documents:
166
+ ${context}
167
+
168
+ Based on the documents above, answer this question: ${query}
169
+
170
+ Answer:`;
171
+ }
172
+ // =============================================================================
173
+ // TOKEN ESTIMATION
174
+ // =============================================================================
175
+ /**
176
+ * Estimate token count for a string
177
+ * Uses a simple character-based heuristic (~4 chars per token for English)
178
+ *
179
+ * @param text - Text to estimate tokens for
180
+ * @returns Estimated token count
181
+ */
182
+ export function estimateTokenCount(text) {
183
+ // Simple heuristic: ~4 characters per token for English text
184
+ // This is a rough approximation; actual tokenization varies by model
185
+ return Math.ceil(text.length / 4);
186
+ }
187
+ /**
188
+ * Calculate available context budget
189
+ *
190
+ * @param maxContextLength - Maximum context window size
191
+ * @param reservedOutputTokens - Tokens reserved for generation
192
+ * @param promptOverhead - Tokens used by prompt formatting
193
+ * @returns Available tokens for context chunks
194
+ */
195
+ export function calculateContextBudget(maxContextLength, reservedOutputTokens, promptOverhead = 100) {
196
+ return Math.max(0, maxContextLength - reservedOutputTokens - promptOverhead);
197
+ }
198
+ // =============================================================================
199
+ // STOP SEQUENCES
200
+ // =============================================================================
201
+ /**
202
+ * Get default stop sequences for a model type
203
+ *
204
+ * @param modelType - Generator model type
205
+ * @returns Array of stop sequences
206
+ */
207
+ export function getDefaultStopSequences(modelType) {
208
+ if (modelType === 'instruct') {
209
+ return [
210
+ SMOLLM2_CHAT_TEMPLATE.assistantEnd,
211
+ SMOLLM2_CHAT_TEMPLATE.endOfText,
212
+ '<|im_start|>',
213
+ '\n\nQuestion:',
214
+ '\n\nContext:'
215
+ ];
216
+ }
217
+ // Causal LM stop sequences
218
+ return [
219
+ '\n\nQuestion:',
220
+ '\n\nDocuments:',
221
+ '\n\n---',
222
+ '<|endoftext|>'
223
+ ];
224
+ }
225
+ //# sourceMappingURL=prompt-templates.js.map
@@ -0,0 +1,132 @@
1
+ /**
2
+ * CORE MODULE — Response Generator Interface for RAG Response Generation
3
+ *
4
+ * Model-agnostic interfaces supporting text generation from retrieved context.
5
+ * Designed for runtime polymorphism and extensibility, following the same
6
+ * patterns established by the UniversalEmbedder interface.
7
+ *
8
+ * SUPPORTED MODELS:
9
+ * - HuggingFaceTB/SmolLM2-135M-Instruct (instruct, balanced, recommended, 3 chunks default)
10
+ * - HuggingFaceTB/SmolLM2-360M-Instruct (instruct, higher quality, 5 chunks default)
11
+ *
12
+ * PREREQUISITES:
13
+ * - Reranking must be enabled for response generation
14
+ *
15
+ * @experimental This feature is experimental and may change in future versions.
16
+ */
17
+ import type { SearchResult } from './types.js';
18
+ /** Supported generator model types */
19
+ export type GeneratorModelType = 'causal-lm' | 'instruct';
20
+ /** Generation request containing query and retrieved context */
21
+ export interface GenerationRequest {
22
+ query: string;
23
+ chunks: SearchResult[];
24
+ systemPrompt?: string;
25
+ maxTokens?: number;
26
+ temperature?: number;
27
+ topP?: number;
28
+ topK?: number;
29
+ repetitionPenalty?: number;
30
+ stopSequences?: string[];
31
+ includeSourceAttribution?: boolean;
32
+ /** Maximum number of chunks to include in context (overrides model default) */
33
+ maxChunksForContext?: number;
34
+ }
35
+ /** Result of text generation */
36
+ export interface GenerationResult {
37
+ response: string;
38
+ tokensUsed: number;
39
+ truncated: boolean;
40
+ modelName: string;
41
+ generationTimeMs: number;
42
+ metadata: {
43
+ promptTokens: number;
44
+ completionTokens: number;
45
+ chunksIncluded: number;
46
+ totalChunks: number;
47
+ finishReason: 'complete' | 'length' | 'stop_sequence' | 'error';
48
+ };
49
+ }
50
+ /** Generator model capabilities */
51
+ export interface GeneratorCapabilities {
52
+ supportsStreaming: boolean;
53
+ supportsSystemPrompt: boolean;
54
+ instructionTuned: boolean;
55
+ maxContextLength: number;
56
+ defaultMaxOutputTokens: number;
57
+ recommendedTemperature: number;
58
+ /** Maximum number of chunks to use for context (default varies by model) */
59
+ defaultMaxChunksForContext: number;
60
+ }
61
+ /** Generator model requirements */
62
+ export interface GeneratorRequirements {
63
+ transformersJsVersion: string;
64
+ minimumMemory: number;
65
+ requiredFeatures: readonly string[];
66
+ platformSupport: readonly string[];
67
+ }
68
+ /** Complete generator model information */
69
+ export interface GeneratorModelInfo {
70
+ name: string;
71
+ type: GeneratorModelType;
72
+ version: string;
73
+ capabilities: GeneratorCapabilities;
74
+ requirements: GeneratorRequirements;
75
+ isDefault?: boolean;
76
+ description?: string;
77
+ }
78
+ /** Generator validation result */
79
+ export interface GeneratorValidationResult {
80
+ isValid: boolean;
81
+ errors: string[];
82
+ warnings: string[];
83
+ suggestions: string[];
84
+ }
85
+ /** Options for creating generator instances */
86
+ export interface GeneratorCreationOptions {
87
+ cachePath?: string;
88
+ timeout?: number;
89
+ enableGPU?: boolean;
90
+ defaultGenerationOptions?: Partial<GenerationRequest>;
91
+ customConfig?: Record<string, any>;
92
+ }
93
+ /**
94
+ * Universal response generator interface
95
+ * @experimental This feature is experimental and may change in future versions.
96
+ */
97
+ export interface ResponseGenerator {
98
+ readonly modelName: string;
99
+ readonly modelType: GeneratorModelType;
100
+ readonly maxContextLength: number;
101
+ readonly maxOutputLength: number;
102
+ generate(request: GenerationRequest): Promise<GenerationResult>;
103
+ generateStream?(request: GenerationRequest): AsyncIterable<string>;
104
+ loadModel(): Promise<void>;
105
+ isLoaded(): boolean;
106
+ getModelInfo(): GeneratorModelInfo;
107
+ cleanup(): Promise<void>;
108
+ }
109
+ export type GenerateFunction = (query: string, chunks: SearchResult[], options?: Partial<GenerationRequest>) => Promise<GenerationResult>;
110
+ export type CreateGeneratorFunction = (modelName: string, options?: GeneratorCreationOptions) => Promise<ResponseGenerator>;
111
+ export declare class GeneratorValidationError extends Error {
112
+ readonly modelName: string;
113
+ readonly availableModels: readonly string[];
114
+ constructor(modelName: string, availableModels: readonly string[], message: string);
115
+ }
116
+ export declare class GenerationError extends Error {
117
+ readonly modelName: string;
118
+ readonly stage: 'loading' | 'tokenization' | 'generation' | 'decoding';
119
+ readonly cause?: Error | undefined;
120
+ constructor(modelName: string, stage: 'loading' | 'tokenization' | 'generation' | 'decoding', message: string, cause?: Error | undefined);
121
+ }
122
+ export declare class ContextWindowError extends Error {
123
+ readonly requiredTokens: number;
124
+ readonly availableTokens: number;
125
+ constructor(requiredTokens: number, availableTokens: number, message: string);
126
+ }
127
+ export declare function supportsStreaming(generator: ResponseGenerator): generator is ResponseGenerator & {
128
+ generateStream(request: GenerationRequest): AsyncIterable<string>;
129
+ };
130
+ export declare function isInstructModel(generator: ResponseGenerator): boolean;
131
+ export declare function createGenerateFunction(generator: ResponseGenerator): GenerateFunction;
132
+ //# sourceMappingURL=response-generator.d.ts.map
@@ -0,0 +1,69 @@
1
+ /**
2
+ * CORE MODULE — Response Generator Interface for RAG Response Generation
3
+ *
4
+ * Model-agnostic interfaces supporting text generation from retrieved context.
5
+ * Designed for runtime polymorphism and extensibility, following the same
6
+ * patterns established by the UniversalEmbedder interface.
7
+ *
8
+ * SUPPORTED MODELS:
9
+ * - HuggingFaceTB/SmolLM2-135M-Instruct (instruct, balanced, recommended, 3 chunks default)
10
+ * - HuggingFaceTB/SmolLM2-360M-Instruct (instruct, higher quality, 5 chunks default)
11
+ *
12
+ * PREREQUISITES:
13
+ * - Reranking must be enabled for response generation
14
+ *
15
+ * @experimental This feature is experimental and may change in future versions.
16
+ */
17
+ // =============================================================================
18
+ // ERROR CLASSES
19
+ // =============================================================================
20
+ export class GeneratorValidationError extends Error {
21
+ modelName;
22
+ availableModels;
23
+ constructor(modelName, availableModels, message) {
24
+ super(message);
25
+ this.modelName = modelName;
26
+ this.availableModels = availableModels;
27
+ this.name = 'GeneratorValidationError';
28
+ }
29
+ }
30
+ export class GenerationError extends Error {
31
+ modelName;
32
+ stage;
33
+ cause;
34
+ constructor(modelName, stage, message, cause) {
35
+ super(message);
36
+ this.modelName = modelName;
37
+ this.stage = stage;
38
+ this.cause = cause;
39
+ this.name = 'GenerationError';
40
+ }
41
+ }
42
+ export class ContextWindowError extends Error {
43
+ requiredTokens;
44
+ availableTokens;
45
+ constructor(requiredTokens, availableTokens, message) {
46
+ super(message);
47
+ this.requiredTokens = requiredTokens;
48
+ this.availableTokens = availableTokens;
49
+ this.name = 'ContextWindowError';
50
+ }
51
+ }
52
+ // =============================================================================
53
+ // UTILITY FUNCTIONS
54
+ // =============================================================================
55
+ export function supportsStreaming(generator) {
56
+ return typeof generator.generateStream === 'function';
57
+ }
58
+ export function isInstructModel(generator) {
59
+ return generator.modelType === 'instruct';
60
+ }
61
+ export function createGenerateFunction(generator) {
62
+ return async (query, chunks, options) => {
63
+ if (!generator.isLoaded()) {
64
+ await generator.loadModel();
65
+ }
66
+ return generator.generate({ query, chunks, ...options });
67
+ };
68
+ }
69
+ //# sourceMappingURL=response-generator.js.map
@@ -6,17 +6,54 @@ import { IndexManager } from '../index-manager.js';
6
6
  import { DatabaseConnection } from './db.js';
7
7
  import type { SearchResult, SearchOptions } from './types.js';
8
8
  import type { EmbedFunction, RerankFunction } from './interfaces.js';
9
+ import type { GenerateFunction } from './response-generator.js';
9
10
  /**
10
11
  * Search engine that provides semantic search capabilities
11
12
  * Implements the core search pipeline: query embedding → vector search → metadata retrieval → optional reranking
12
13
  * Uses explicit dependency injection for clean architecture
13
14
  */
15
+ /**
16
+ * Extended search options with generation support
17
+ * @experimental Generation features are experimental
18
+ */
19
+ export interface ExtendedSearchOptions extends SearchOptions {
20
+ /** Enable AI response generation from search results */
21
+ generateResponse?: boolean;
22
+ /** Generator model to use (default: SmolLM2-135M-Instruct) */
23
+ generatorModel?: string;
24
+ /** Generation options */
25
+ generationOptions?: {
26
+ maxTokens?: number;
27
+ temperature?: number;
28
+ systemPrompt?: string;
29
+ /** Maximum chunks to use for context (overrides model default) */
30
+ maxChunksForContext?: number;
31
+ };
32
+ }
33
+ /**
34
+ * Search result with optional generated response
35
+ * @experimental Generation features are experimental
36
+ */
37
+ export interface SearchResultWithGeneration {
38
+ /** Search results (chunks) */
39
+ results: SearchResult[];
40
+ /** Generated response (if generation was enabled) */
41
+ generation?: {
42
+ response: string;
43
+ modelUsed: string;
44
+ tokensUsed: number;
45
+ truncated: boolean;
46
+ chunksUsedForContext: number;
47
+ generationTimeMs: number;
48
+ };
49
+ }
14
50
  export declare class SearchEngine {
15
51
  private embedFn;
16
52
  private indexManager;
17
53
  private db;
18
54
  private rerankFn?;
19
55
  private contentResolver?;
56
+ private generateFn?;
20
57
  /**
21
58
  * Creates a new SearchEngine with explicit dependency injection
22
59
  *
@@ -71,7 +108,41 @@ export declare class SearchEngine {
71
108
  * const search = new SearchEngine(customEmbedFn, indexManager, db);
72
109
  * ```
73
110
  */
74
- constructor(embedFn: EmbedFunction, indexManager: IndexManager, db: DatabaseConnection, rerankFn?: RerankFunction | undefined, contentResolver?: import('./content-resolver.js').ContentResolver);
111
+ constructor(embedFn: EmbedFunction, indexManager: IndexManager, db: DatabaseConnection, rerankFn?: RerankFunction | undefined, contentResolver?: import('./content-resolver.js').ContentResolver, generateFn?: GenerateFunction);
112
+ /**
113
+ * Set or update the generate function
114
+ * @experimental This method is experimental and may change
115
+ */
116
+ setGenerateFunction(generateFn: GenerateFunction | undefined): void;
117
+ /**
118
+ * Check if generation is available
119
+ * @experimental This method is experimental and may change
120
+ */
121
+ hasGenerationCapability(): boolean;
122
+ /**
123
+ * Perform semantic search with optional AI response generation
124
+ *
125
+ * This method extends the standard search with optional response generation.
126
+ * When generation is enabled, the retrieved chunks are used as context for
127
+ * an AI model to generate a synthesized response.
128
+ *
129
+ * @param query - Search query string
130
+ * @param options - Extended search options including generation settings
131
+ * @returns Promise resolving to search results with optional generated response
132
+ *
133
+ * @example
134
+ * ```typescript
135
+ * // Search with generation
136
+ * const result = await search.searchWithGeneration('How does auth work?', {
137
+ * top_k: 5,
138
+ * generateResponse: true
139
+ * });
140
+ * console.log(result.generation?.response);
141
+ * ```
142
+ *
143
+ * @experimental This method is experimental and may change in future versions.
144
+ */
145
+ searchWithGeneration(query: string, options?: ExtendedSearchOptions): Promise<SearchResultWithGeneration>;
75
146
  /**
76
147
  * Perform semantic search on the indexed documents
77
148
  * Implements the core search pipeline: query embedding → vector search → metadata retrieval → optional reranking
@@ -5,17 +5,13 @@
5
5
  import { getChunksByEmbeddingIds } from './db.js';
6
6
  import { config } from './config.js';
7
7
  import { createMissingDependencyError } from './actionable-error-messages.js';
8
- /**
9
- * Search engine that provides semantic search capabilities
10
- * Implements the core search pipeline: query embedding → vector search → metadata retrieval → optional reranking
11
- * Uses explicit dependency injection for clean architecture
12
- */
13
8
  export class SearchEngine {
14
9
  embedFn;
15
10
  indexManager;
16
11
  db;
17
12
  rerankFn;
18
13
  contentResolver;
14
+ generateFn;
19
15
  /**
20
16
  * Creates a new SearchEngine with explicit dependency injection
21
17
  *
@@ -70,7 +66,7 @@ export class SearchEngine {
70
66
  * const search = new SearchEngine(customEmbedFn, indexManager, db);
71
67
  * ```
72
68
  */
73
- constructor(embedFn, indexManager, db, rerankFn, contentResolver) {
69
+ constructor(embedFn, indexManager, db, rerankFn, contentResolver, generateFn) {
74
70
  this.embedFn = embedFn;
75
71
  this.indexManager = indexManager;
76
72
  this.db = db;
@@ -93,6 +89,83 @@ export class SearchEngine {
93
89
  }
94
90
  // Initialize ContentResolver if provided, or create lazily when needed
95
91
  this.contentResolver = contentResolver;
92
+ // Initialize GenerateFunction if provided (experimental)
93
+ this.generateFn = generateFn;
94
+ }
95
+ /**
96
+ * Set or update the generate function
97
+ * @experimental This method is experimental and may change
98
+ */
99
+ setGenerateFunction(generateFn) {
100
+ this.generateFn = generateFn;
101
+ }
102
+ /**
103
+ * Check if generation is available
104
+ * @experimental This method is experimental and may change
105
+ */
106
+ hasGenerationCapability() {
107
+ return this.generateFn !== undefined;
108
+ }
109
+ /**
110
+ * Perform semantic search with optional AI response generation
111
+ *
112
+ * This method extends the standard search with optional response generation.
113
+ * When generation is enabled, the retrieved chunks are used as context for
114
+ * an AI model to generate a synthesized response.
115
+ *
116
+ * @param query - Search query string
117
+ * @param options - Extended search options including generation settings
118
+ * @returns Promise resolving to search results with optional generated response
119
+ *
120
+ * @example
121
+ * ```typescript
122
+ * // Search with generation
123
+ * const result = await search.searchWithGeneration('How does auth work?', {
124
+ * top_k: 5,
125
+ * generateResponse: true
126
+ * });
127
+ * console.log(result.generation?.response);
128
+ * ```
129
+ *
130
+ * @experimental This method is experimental and may change in future versions.
131
+ */
132
+ async searchWithGeneration(query, options = {}) {
133
+ // Perform standard search
134
+ const results = await this.search(query, options);
135
+ // If generation not requested or no results, return without generation
136
+ if (!options.generateResponse || results.length === 0) {
137
+ return { results };
138
+ }
139
+ // Check if generation is available
140
+ if (!this.generateFn) {
141
+ console.warn('⚠️ [EXPERIMENTAL] Generation requested but no generator configured');
142
+ return { results };
143
+ }
144
+ try {
145
+ console.log('🤖 [EXPERIMENTAL] Generating response from search results...');
146
+ const generationResult = await this.generateFn(query, results, {
147
+ maxTokens: options.generationOptions?.maxTokens,
148
+ temperature: options.generationOptions?.temperature,
149
+ systemPrompt: options.generationOptions?.systemPrompt,
150
+ maxChunksForContext: options.generationOptions?.maxChunksForContext
151
+ });
152
+ return {
153
+ results,
154
+ generation: {
155
+ response: generationResult.response,
156
+ modelUsed: generationResult.modelName,
157
+ tokensUsed: generationResult.tokensUsed,
158
+ truncated: generationResult.truncated,
159
+ chunksUsedForContext: generationResult.metadata.chunksIncluded,
160
+ generationTimeMs: generationResult.generationTimeMs
161
+ }
162
+ };
163
+ }
164
+ catch (error) {
165
+ console.error('❌ [EXPERIMENTAL] Generation failed:', error instanceof Error ? error.message : 'Unknown error');
166
+ // Return results without generation on error
167
+ return { results };
168
+ }
96
169
  }
97
170
  /**
98
171
  * Perform semantic search on the indexed documents
@@ -63,4 +63,5 @@ export interface Document {
63
63
  }
64
64
  export type { DatabaseConnection } from './db.js';
65
65
  export type { ContentChunk as ChunkResult } from './db.js';
66
+ export type { GenerationRequest, GenerationResult, GenerateFunction, ResponseGenerator, GeneratorModelInfo } from './response-generator.js';
66
67
  //# sourceMappingURL=types.d.ts.map
@@ -242,6 +242,16 @@ function handleSetEf(payload) {
242
242
  function handleIndexExists(payload) {
243
243
  return existsSync(payload.indexPath);
244
244
  }
245
+ // Handle unhandled promise rejections to prevent worker crashes
246
+ process.on('unhandledRejection', (reason, promise) => {
247
+ console.error('Worker unhandled rejection:', reason);
248
+ // Don't exit - just log the error
249
+ });
250
+ // Handle uncaught exceptions
251
+ process.on('uncaughtException', (error) => {
252
+ console.error('Worker uncaught exception:', error);
253
+ // Don't exit - just log the error
254
+ });
245
255
  // Main message handler
246
256
  parentPort.on('message', async (request) => {
247
257
  try {