rag-lite-ts 2.2.0 → 2.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (100) hide show
  1. package/README.md +88 -5
  2. package/dist/cjs/cli/indexer.js +73 -15
  3. package/dist/cjs/cli/search.js +77 -2
  4. package/dist/cjs/cli/ui-server.d.ts +5 -0
  5. package/dist/cjs/cli/ui-server.js +152 -0
  6. package/dist/cjs/cli.js +53 -7
  7. package/dist/cjs/core/abstract-generator.d.ts +97 -0
  8. package/dist/cjs/core/abstract-generator.js +222 -0
  9. package/dist/cjs/core/binary-index-format.js +53 -10
  10. package/dist/cjs/core/db.d.ts +56 -0
  11. package/dist/cjs/core/db.js +105 -0
  12. package/dist/cjs/core/generator-registry.d.ts +114 -0
  13. package/dist/cjs/core/generator-registry.js +280 -0
  14. package/dist/cjs/core/index.d.ts +4 -0
  15. package/dist/cjs/core/index.js +11 -0
  16. package/dist/cjs/core/ingestion.js +3 -0
  17. package/dist/cjs/core/knowledge-base-manager.d.ts +109 -0
  18. package/dist/cjs/core/knowledge-base-manager.js +256 -0
  19. package/dist/cjs/core/lazy-dependency-loader.d.ts +43 -0
  20. package/dist/cjs/core/lazy-dependency-loader.js +111 -2
  21. package/dist/cjs/core/prompt-templates.d.ts +138 -0
  22. package/dist/cjs/core/prompt-templates.js +225 -0
  23. package/dist/cjs/core/response-generator.d.ts +132 -0
  24. package/dist/cjs/core/response-generator.js +69 -0
  25. package/dist/cjs/core/search-pipeline.js +1 -1
  26. package/dist/cjs/core/search.d.ts +72 -1
  27. package/dist/cjs/core/search.js +80 -7
  28. package/dist/cjs/core/types.d.ts +1 -0
  29. package/dist/cjs/core/vector-index-messages.d.ts +52 -0
  30. package/dist/cjs/core/vector-index-messages.js +5 -0
  31. package/dist/cjs/core/vector-index-worker.d.ts +6 -0
  32. package/dist/cjs/core/vector-index-worker.js +314 -0
  33. package/dist/cjs/core/vector-index.d.ts +45 -10
  34. package/dist/cjs/core/vector-index.js +279 -218
  35. package/dist/cjs/factories/generator-factory.d.ts +88 -0
  36. package/dist/cjs/factories/generator-factory.js +151 -0
  37. package/dist/cjs/factories/index.d.ts +1 -0
  38. package/dist/cjs/factories/index.js +5 -0
  39. package/dist/cjs/factories/ingestion-factory.js +3 -7
  40. package/dist/cjs/factories/search-factory.js +11 -0
  41. package/dist/cjs/index-manager.d.ts +23 -3
  42. package/dist/cjs/index-manager.js +84 -15
  43. package/dist/cjs/index.d.ts +11 -1
  44. package/dist/cjs/index.js +19 -1
  45. package/dist/cjs/text/generators/causal-lm-generator.d.ts +65 -0
  46. package/dist/cjs/text/generators/causal-lm-generator.js +197 -0
  47. package/dist/cjs/text/generators/index.d.ts +10 -0
  48. package/dist/cjs/text/generators/index.js +10 -0
  49. package/dist/cjs/text/generators/instruct-generator.d.ts +62 -0
  50. package/dist/cjs/text/generators/instruct-generator.js +192 -0
  51. package/dist/esm/cli/indexer.js +73 -15
  52. package/dist/esm/cli/search.js +77 -2
  53. package/dist/esm/cli/ui-server.d.ts +5 -0
  54. package/dist/esm/cli/ui-server.js +152 -0
  55. package/dist/esm/cli.js +53 -7
  56. package/dist/esm/core/abstract-generator.d.ts +97 -0
  57. package/dist/esm/core/abstract-generator.js +222 -0
  58. package/dist/esm/core/binary-index-format.js +53 -10
  59. package/dist/esm/core/db.d.ts +56 -0
  60. package/dist/esm/core/db.js +105 -0
  61. package/dist/esm/core/generator-registry.d.ts +114 -0
  62. package/dist/esm/core/generator-registry.js +280 -0
  63. package/dist/esm/core/index.d.ts +4 -0
  64. package/dist/esm/core/index.js +11 -0
  65. package/dist/esm/core/ingestion.js +3 -0
  66. package/dist/esm/core/knowledge-base-manager.d.ts +109 -0
  67. package/dist/esm/core/knowledge-base-manager.js +256 -0
  68. package/dist/esm/core/lazy-dependency-loader.d.ts +43 -0
  69. package/dist/esm/core/lazy-dependency-loader.js +111 -2
  70. package/dist/esm/core/prompt-templates.d.ts +138 -0
  71. package/dist/esm/core/prompt-templates.js +225 -0
  72. package/dist/esm/core/response-generator.d.ts +132 -0
  73. package/dist/esm/core/response-generator.js +69 -0
  74. package/dist/esm/core/search-pipeline.js +1 -1
  75. package/dist/esm/core/search.d.ts +72 -1
  76. package/dist/esm/core/search.js +80 -7
  77. package/dist/esm/core/types.d.ts +1 -0
  78. package/dist/esm/core/vector-index-messages.d.ts +52 -0
  79. package/dist/esm/core/vector-index-messages.js +5 -0
  80. package/dist/esm/core/vector-index-worker.d.ts +6 -0
  81. package/dist/esm/core/vector-index-worker.js +314 -0
  82. package/dist/esm/core/vector-index.d.ts +45 -10
  83. package/dist/esm/core/vector-index.js +279 -218
  84. package/dist/esm/factories/generator-factory.d.ts +88 -0
  85. package/dist/esm/factories/generator-factory.js +151 -0
  86. package/dist/esm/factories/index.d.ts +1 -0
  87. package/dist/esm/factories/index.js +5 -0
  88. package/dist/esm/factories/ingestion-factory.js +3 -7
  89. package/dist/esm/factories/search-factory.js +11 -0
  90. package/dist/esm/index-manager.d.ts +23 -3
  91. package/dist/esm/index-manager.js +84 -15
  92. package/dist/esm/index.d.ts +11 -1
  93. package/dist/esm/index.js +19 -1
  94. package/dist/esm/text/generators/causal-lm-generator.d.ts +65 -0
  95. package/dist/esm/text/generators/causal-lm-generator.js +197 -0
  96. package/dist/esm/text/generators/index.d.ts +10 -0
  97. package/dist/esm/text/generators/index.js +10 -0
  98. package/dist/esm/text/generators/instruct-generator.d.ts +62 -0
  99. package/dist/esm/text/generators/instruct-generator.js +192 -0
  100. package/package.json +14 -7
@@ -0,0 +1,256 @@
1
+ /**
2
+ * Knowledge Base Manager
3
+ *
4
+ * Provides a unified API for managing the knowledge base (database + vector index).
5
+ * This module is designed to solve file locking issues on Windows by using
6
+ * in-place reset operations instead of file deletion.
7
+ *
8
+ * Key Features:
9
+ * - Reset database and index without file deletion (avoids EBUSY/EACCES errors)
10
+ * - Coordinated reset of both database and index in a single operation
11
+ * - Connection management to prevent orphaned handles
12
+ * - Cross-platform compatibility (especially Windows)
13
+ *
14
+ * @module knowledge-base-manager
15
+ */
16
+ import { openDatabase, resetDatabase, hasDatabaseData } from './db.js';
17
+ import { IndexManager } from '../index-manager.js';
18
+ import { DatabaseConnectionManager } from './database-connection-manager.js';
19
+ import { getModelDefaults, config } from './config.js';
20
+ import { existsSync } from 'fs';
21
+ /**
22
+ * Knowledge Base Manager
23
+ *
24
+ * Manages the complete knowledge base lifecycle including database and vector index.
25
+ * Provides safe reset operations that avoid file locking issues on Windows.
26
+ *
27
+ * @example
28
+ * ```typescript
29
+ * // Reset knowledge base for force rebuild
30
+ * const result = await KnowledgeBaseManager.reset('./db.sqlite', './index.bin');
31
+ * console.log(`Reset ${result.database.documentsDeleted} documents and ${result.index.vectorsCleared} vectors`);
32
+ *
33
+ * // Reset with options
34
+ * const result = await KnowledgeBaseManager.reset('./db.sqlite', './index.bin', {
35
+ * preserveSystemInfo: true, // Keep mode/model configuration
36
+ * modelName: 'all-MiniLM-L6-v2' // Specify model for index
37
+ * });
38
+ * ```
39
+ */
40
+ export class KnowledgeBaseManager {
41
+ /**
42
+ * Reset the knowledge base by clearing all data while keeping files intact.
43
+ * This is a safer alternative to file deletion that avoids file locking issues on Windows.
44
+ *
45
+ * The reset operation:
46
+ * 1. Closes any existing connections via DatabaseConnectionManager
47
+ * 2. Opens a fresh connection to the database
48
+ * 3. Deletes all rows from documents, chunks, content_metadata tables
49
+ * 4. Optionally runs VACUUM to reclaim disk space
50
+ * 5. Reinitializes the vector index (clears all vectors)
51
+ * 6. Saves the empty index to disk (overwrites existing file content)
52
+ *
53
+ * This approach works because:
54
+ * - We don't delete files, so no EBUSY/EACCES errors
55
+ * - The same file handles can be reused or replaced safely
56
+ * - SQLite transactions ensure data integrity
57
+ * - Index overwrite uses standard file write operations
58
+ *
59
+ * @param dbPath - Path to the SQLite database file
60
+ * @param indexPath - Path to the vector index file
61
+ * @param options - Reset options
62
+ * @returns Promise resolving to reset result statistics
63
+ *
64
+ * @throws Error if database or index reset fails
65
+ */
66
+ static async reset(dbPath, indexPath, options = {}) {
67
+ const startTime = Date.now();
68
+ const warnings = [];
69
+ console.log('🔄 Starting knowledge base reset...');
70
+ console.log(` Database: ${dbPath}`);
71
+ console.log(` Index: ${indexPath}`);
72
+ // Step 1: Close any existing managed connections to prevent conflicts
73
+ console.log('\n📡 Step 1: Closing existing connections...');
74
+ try {
75
+ if (DatabaseConnectionManager.hasConnection(dbPath)) {
76
+ await DatabaseConnectionManager.forceCloseConnection(dbPath);
77
+ console.log(' ✓ Closed existing database connection');
78
+ }
79
+ else {
80
+ console.log(' ✓ No existing connection to close');
81
+ }
82
+ }
83
+ catch (error) {
84
+ const warning = `Warning: Error closing existing connection: ${error instanceof Error ? error.message : 'Unknown error'}`;
85
+ warnings.push(warning);
86
+ console.warn(` ⚠️ ${warning}`);
87
+ }
88
+ // Small delay to ensure handles are fully released
89
+ await new Promise(resolve => setTimeout(resolve, 50));
90
+ // Step 2: Reset the database
91
+ console.log('\n💾 Step 2: Resetting database...');
92
+ let db = null;
93
+ let dbResetResult;
94
+ try {
95
+ // Open a fresh connection
96
+ db = await openDatabase(dbPath);
97
+ // Perform the reset
98
+ dbResetResult = await resetDatabase(db, {
99
+ preserveSystemInfo: options.preserveSystemInfo,
100
+ runVacuum: options.runVacuum
101
+ });
102
+ console.log(' ✓ Database reset complete');
103
+ }
104
+ catch (error) {
105
+ console.error(' ❌ Database reset failed:', error);
106
+ throw new Error(`Failed to reset database: ${error instanceof Error ? error.message : 'Unknown error'}`);
107
+ }
108
+ finally {
109
+ // Close the database connection
110
+ if (db) {
111
+ try {
112
+ await db.close();
113
+ }
114
+ catch (closeError) {
115
+ warnings.push(`Warning: Error closing database after reset: ${closeError}`);
116
+ }
117
+ }
118
+ }
119
+ // Step 3: Reset the vector index
120
+ console.log('\n📇 Step 3: Resetting vector index...');
121
+ let indexResetResult;
122
+ const indexStartTime = Date.now();
123
+ try {
124
+ // Determine model and dimensions
125
+ const modelName = options.modelName || config.embedding_model;
126
+ const modelDefaults = getModelDefaults(modelName);
127
+ // Check if index file exists
128
+ if (!existsSync(indexPath)) {
129
+ console.log(' Index file does not exist, will be created during ingestion');
130
+ indexResetResult = {
131
+ vectorsCleared: 0,
132
+ resetTimeMs: Date.now() - indexStartTime
133
+ };
134
+ }
135
+ else {
136
+ // Create IndexManager and reset
137
+ // We need to handle dimension mismatch gracefully since the user might be
138
+ // switching models (e.g., from MPNet 768D to MiniLM 384D)
139
+ const indexManager = new IndexManager(indexPath, dbPath, modelDefaults.dimensions, modelName);
140
+ let previousVectorCount = 0;
141
+ try {
142
+ // Try to initialize with forceRecreate=false first to get the vector count
143
+ // skipModelCheck=true since we're resetting anyway
144
+ await indexManager.initialize(true, false);
145
+ // Get current vector count before reset
146
+ previousVectorCount = (await indexManager.hasVectors()) ?
147
+ (await indexManager.getStats()).totalVectors : 0;
148
+ // Perform the reset
149
+ await indexManager.reset();
150
+ }
151
+ catch (initError) {
152
+ // If initialization failed (e.g., dimension mismatch), force recreate the index
153
+ // This handles the case where user is switching models
154
+ const errorMessage = initError?.message || String(initError);
155
+ if (errorMessage.includes('dimension mismatch') || errorMessage.includes('Vector dimension')) {
156
+ console.log(' ⚠️ Dimension mismatch detected - forcing index recreation');
157
+ console.log(' (This is expected when switching embedding models)');
158
+ // Create a fresh IndexManager and force recreate
159
+ const freshIndexManager = new IndexManager(indexPath, dbPath, modelDefaults.dimensions, modelName);
160
+ await freshIndexManager.initialize(true, true); // skipModelCheck=true, forceRecreate=true
161
+ await freshIndexManager.saveIndex();
162
+ await freshIndexManager.close();
163
+ // We don't know the previous count since we couldn't load the old index
164
+ // But we can estimate it was non-zero since the file existed
165
+ previousVectorCount = -1; // Indicate unknown
166
+ }
167
+ else {
168
+ // Re-throw other errors
169
+ throw initError;
170
+ }
171
+ }
172
+ // Close the index manager
173
+ await indexManager.close();
174
+ indexResetResult = {
175
+ vectorsCleared: previousVectorCount,
176
+ resetTimeMs: Date.now() - indexStartTime
177
+ };
178
+ console.log(' ✓ Index reset complete');
179
+ }
180
+ }
181
+ catch (error) {
182
+ console.error(' ❌ Index reset failed:', error);
183
+ throw new Error(`Failed to reset index: ${error instanceof Error ? error.message : 'Unknown error'}`);
184
+ }
185
+ const totalTimeMs = Date.now() - startTime;
186
+ // Summary
187
+ console.log('\n✅ Knowledge base reset complete!');
188
+ console.log(` Total time: ${totalTimeMs}ms`);
189
+ console.log(` Documents deleted: ${dbResetResult.documentsDeleted}`);
190
+ console.log(` Chunks deleted: ${dbResetResult.chunksDeleted}`);
191
+ console.log(` Vectors cleared: ${indexResetResult.vectorsCleared === -1 ? '(unknown - index recreated due to model change)' : indexResetResult.vectorsCleared}`);
192
+ if (warnings.length > 0) {
193
+ console.log(` Warnings: ${warnings.length}`);
194
+ }
195
+ return {
196
+ success: true,
197
+ database: dbResetResult,
198
+ index: indexResetResult,
199
+ totalTimeMs,
200
+ warnings
201
+ };
202
+ }
203
+ /**
204
+ * Check if the knowledge base has any data
205
+ *
206
+ * @param dbPath - Path to the SQLite database file
207
+ * @returns Promise resolving to true if database has data, false if empty
208
+ */
209
+ static async hasData(dbPath) {
210
+ let db = null;
211
+ try {
212
+ db = await openDatabase(dbPath);
213
+ return await hasDatabaseData(db);
214
+ }
215
+ catch (error) {
216
+ // If we can't open the database, assume no data
217
+ return false;
218
+ }
219
+ finally {
220
+ if (db) {
221
+ try {
222
+ await db.close();
223
+ }
224
+ catch {
225
+ // Ignore close errors
226
+ }
227
+ }
228
+ }
229
+ }
230
+ /**
231
+ * Close all connections to the knowledge base
232
+ * Useful before operations that might conflict with open handles
233
+ *
234
+ * @param dbPath - Path to the SQLite database file
235
+ */
236
+ static async closeAllConnections(dbPath) {
237
+ console.log('🔒 Closing all knowledge base connections...');
238
+ try {
239
+ if (DatabaseConnectionManager.hasConnection(dbPath)) {
240
+ await DatabaseConnectionManager.forceCloseConnection(dbPath);
241
+ }
242
+ // Also close WAL/SHM connections if they exist
243
+ const sidecars = [`${dbPath}-wal`, `${dbPath}-shm`];
244
+ for (const sidecar of sidecars) {
245
+ if (DatabaseConnectionManager.hasConnection(sidecar)) {
246
+ await DatabaseConnectionManager.forceCloseConnection(sidecar);
247
+ }
248
+ }
249
+ console.log('✓ All connections closed');
250
+ }
251
+ catch (error) {
252
+ console.warn('⚠️ Error closing connections:', error);
253
+ }
254
+ }
255
+ }
256
+ //# sourceMappingURL=knowledge-base-manager.js.map
@@ -8,6 +8,7 @@
8
8
  import '../dom-polyfills.js';
9
9
  import type { UniversalEmbedder } from './universal-embedder.js';
10
10
  import type { RerankFunction } from './interfaces.js';
11
+ import type { ResponseGenerator } from './response-generator.js';
11
12
  /**
12
13
  * Lazy loader for embedder implementations
13
14
  * Only loads the specific embedder type when needed
@@ -42,6 +43,42 @@ export declare class LazyEmbedderLoader {
42
43
  multimodalEmbedders: number;
43
44
  };
44
45
  }
46
+ /**
47
+ * Lazy loader for response generator implementations
48
+ * Only loads the specific generator type when needed
49
+ *
50
+ * @experimental This feature is experimental and may change in future versions.
51
+ */
52
+ export declare class LazyGeneratorLoader {
53
+ private static cache;
54
+ /**
55
+ * Lazily load and create an instruct generator (SmolLM2-Instruct)
56
+ * Only imports the module when generation is actually requested
57
+ */
58
+ static loadInstructGenerator(modelName: string, options?: any): Promise<ResponseGenerator>;
59
+ /**
60
+ * Lazily load and create a causal LM generator (DistilGPT2)
61
+ * Only imports the module when generation is actually requested
62
+ */
63
+ static loadCausalLMGenerator(modelName: string, options?: any): Promise<ResponseGenerator>;
64
+ /**
65
+ * Check if a generator is already loaded in cache
66
+ */
67
+ static isGeneratorLoaded(modelName: string, modelType: 'instruct' | 'causal-lm'): boolean;
68
+ /**
69
+ * Remove a generator from the cache (called when generator is cleaned up)
70
+ */
71
+ static removeGeneratorFromCache(modelName: string, modelType: 'instruct' | 'causal-lm'): void;
72
+ /**
73
+ * Get statistics about loaded generators
74
+ */
75
+ static getLoadingStats(): {
76
+ loadedGenerators: string[];
77
+ totalLoaded: number;
78
+ instructGenerators: number;
79
+ causalLMGenerators: number;
80
+ };
81
+ }
45
82
  /**
46
83
  * Lazy loader for reranking implementations
47
84
  * Only loads the specific reranker type when needed
@@ -107,6 +144,11 @@ export declare class LazyMultimodalLoader {
107
144
  * Provides a single entry point for dependency management
108
145
  */
109
146
  export declare class LazyDependencyManager {
147
+ /**
148
+ * Load response generator based on model type with lazy loading
149
+ * @experimental This feature is experimental and may change in future versions.
150
+ */
151
+ static loadGenerator(modelName: string, modelType: 'instruct' | 'causal-lm', options?: any): Promise<ResponseGenerator>;
110
152
  /**
111
153
  * Load embedder based on model type with lazy loading
112
154
  */
@@ -121,6 +163,7 @@ export declare class LazyDependencyManager {
121
163
  static getLoadingStatistics(): {
122
164
  embedders: ReturnType<typeof LazyEmbedderLoader.getLoadingStats>;
123
165
  rerankers: ReturnType<typeof LazyRerankerLoader.getLoadingStats>;
166
+ generators: ReturnType<typeof LazyGeneratorLoader.getLoadingStats>;
124
167
  multimodal: ReturnType<typeof LazyMultimodalLoader.getMultimodalLoadingStatus>;
125
168
  totalModulesLoaded: number;
126
169
  memoryImpact: 'low' | 'medium' | 'high';
@@ -149,6 +149,99 @@ export class LazyEmbedderLoader {
149
149
  }
150
150
  }
151
151
  // =============================================================================
152
+ // LAZY GENERATOR LOADING
153
+ // =============================================================================
154
+ /**
155
+ * Lazy loader for response generator implementations
156
+ * Only loads the specific generator type when needed
157
+ *
158
+ * @experimental This feature is experimental and may change in future versions.
159
+ */
160
+ export class LazyGeneratorLoader {
161
+ static cache = LazyLoadingCache.getInstance();
162
+ /**
163
+ * Lazily load and create an instruct generator (SmolLM2-Instruct)
164
+ * Only imports the module when generation is actually requested
165
+ */
166
+ static async loadInstructGenerator(modelName, options = {}) {
167
+ const cacheKey = `generator:instruct:${modelName}`;
168
+ return this.cache.getOrLoad(cacheKey, async () => {
169
+ try {
170
+ console.log(`🔄 [EXPERIMENTAL] Lazy loading instruct generator: ${modelName}`);
171
+ // Dynamic import - only loaded when generation is requested
172
+ const { InstructGenerator } = await import('../text/generators/instruct-generator.js');
173
+ const generator = new InstructGenerator(modelName, options);
174
+ await generator.loadModel();
175
+ console.log(`✅ Instruct generator loaded: ${modelName}`);
176
+ return generator;
177
+ }
178
+ catch (error) {
179
+ const enhancedError = createError.model(`Failed to lazy load instruct generator '${modelName}': ${error instanceof Error ? error.message : 'Unknown error'}`);
180
+ handleError(enhancedError, 'LazyGeneratorLoader', {
181
+ severity: ErrorSeverity.ERROR,
182
+ category: ErrorCategory.MODEL
183
+ });
184
+ throw enhancedError;
185
+ }
186
+ });
187
+ }
188
+ /**
189
+ * Lazily load and create a causal LM generator (DistilGPT2)
190
+ * Only imports the module when generation is actually requested
191
+ */
192
+ static async loadCausalLMGenerator(modelName, options = {}) {
193
+ const cacheKey = `generator:causal-lm:${modelName}`;
194
+ return this.cache.getOrLoad(cacheKey, async () => {
195
+ try {
196
+ console.log(`🔄 [EXPERIMENTAL] Lazy loading causal LM generator: ${modelName}`);
197
+ // Dynamic import - only loaded when generation is requested
198
+ const { CausalLMGenerator } = await import('../text/generators/causal-lm-generator.js');
199
+ const generator = new CausalLMGenerator(modelName, options);
200
+ await generator.loadModel();
201
+ console.log(`✅ Causal LM generator loaded: ${modelName}`);
202
+ return generator;
203
+ }
204
+ catch (error) {
205
+ const enhancedError = createError.model(`Failed to lazy load causal LM generator '${modelName}': ${error instanceof Error ? error.message : 'Unknown error'}`);
206
+ handleError(enhancedError, 'LazyGeneratorLoader', {
207
+ severity: ErrorSeverity.ERROR,
208
+ category: ErrorCategory.MODEL
209
+ });
210
+ throw enhancedError;
211
+ }
212
+ });
213
+ }
214
+ /**
215
+ * Check if a generator is already loaded in cache
216
+ */
217
+ static isGeneratorLoaded(modelName, modelType) {
218
+ const cacheKey = `generator:${modelType}:${modelName}`;
219
+ return this.cache.getLoadedModules().includes(cacheKey);
220
+ }
221
+ /**
222
+ * Remove a generator from the cache (called when generator is cleaned up)
223
+ */
224
+ static removeGeneratorFromCache(modelName, modelType) {
225
+ const cacheKey = `generator:${modelType}:${modelName}`;
226
+ this.cache.remove(cacheKey);
227
+ console.log(`🧹 Removed generator from cache: ${cacheKey}`);
228
+ }
229
+ /**
230
+ * Get statistics about loaded generators
231
+ */
232
+ static getLoadingStats() {
233
+ const loadedModules = this.cache.getLoadedModules().filter(key => key.startsWith('generator:'));
234
+ const instructGenerators = loadedModules.filter(key => key.includes(':instruct:')).length;
235
+ const causalLMGenerators = loadedModules.filter(key => key.includes(':causal-lm:')).length;
236
+ return {
237
+ loadedGenerators: loadedModules,
238
+ totalLoaded: loadedModules.length,
239
+ instructGenerators,
240
+ causalLMGenerators
241
+ };
242
+ }
243
+ }
244
+ // =============================================================================
152
245
  // LAZY RERANKER LOADING
153
246
  // =============================================================================
154
247
  /**
@@ -332,6 +425,20 @@ export class LazyMultimodalLoader {
332
425
  * Provides a single entry point for dependency management
333
426
  */
334
427
  export class LazyDependencyManager {
428
+ /**
429
+ * Load response generator based on model type with lazy loading
430
+ * @experimental This feature is experimental and may change in future versions.
431
+ */
432
+ static async loadGenerator(modelName, modelType, options = {}) {
433
+ switch (modelType) {
434
+ case 'instruct':
435
+ return LazyGeneratorLoader.loadInstructGenerator(modelName, options);
436
+ case 'causal-lm':
437
+ return LazyGeneratorLoader.loadCausalLMGenerator(modelName, options);
438
+ default:
439
+ throw createError.validation(`Unsupported generator model type for lazy loading: ${modelType}`);
440
+ }
441
+ }
335
442
  /**
336
443
  * Load embedder based on model type with lazy loading
337
444
  */
@@ -367,19 +474,21 @@ export class LazyDependencyManager {
367
474
  static getLoadingStatistics() {
368
475
  const embedderStats = LazyEmbedderLoader.getLoadingStats();
369
476
  const rerankerStats = LazyRerankerLoader.getLoadingStats();
477
+ const generatorStats = LazyGeneratorLoader.getLoadingStats();
370
478
  const multimodalStats = LazyMultimodalLoader.getMultimodalLoadingStatus();
371
- const totalModules = embedderStats.totalLoaded + rerankerStats.totalLoaded + multimodalStats.loadedProcessors.length;
479
+ const totalModules = embedderStats.totalLoaded + rerankerStats.totalLoaded + generatorStats.totalLoaded + multimodalStats.loadedProcessors.length;
372
480
  // Estimate memory impact based on loaded modules
373
481
  let memoryImpact = 'low';
374
482
  if (embedderStats.multimodalEmbedders > 0 || multimodalStats.imageToTextLoaded) {
375
483
  memoryImpact = 'high';
376
484
  }
377
- else if (totalModules > 2) {
485
+ else if (totalModules > 2 || generatorStats.totalLoaded > 0) {
378
486
  memoryImpact = 'medium';
379
487
  }
380
488
  return {
381
489
  embedders: embedderStats,
382
490
  rerankers: rerankerStats,
491
+ generators: generatorStats,
383
492
  multimodal: multimodalStats,
384
493
  totalModulesLoaded: totalModules,
385
494
  memoryImpact
@@ -0,0 +1,138 @@
1
+ /**
2
+ * CORE MODULE — Prompt Templates for RAG Response Generation
3
+ *
4
+ * Provides prompt engineering utilities for different generator model types.
5
+ * Handles context formatting, token budget management, and system prompts.
6
+ *
7
+ * PROMPT STRATEGIES:
8
+ * - Instruct models: Use chat template with system/user/assistant roles
9
+ * - Causal LM models: Use simple document + question format
10
+ *
11
+ * @experimental This feature is experimental and may change in future versions.
12
+ */
13
+ import type { SearchResult } from './types.js';
14
+ import type { GeneratorModelType } from './response-generator.js';
15
+ /**
16
+ * Default system prompt for instruct models
17
+ * Emphasizes grounded responses using only provided context
18
+ */
19
+ export declare const DEFAULT_SYSTEM_PROMPT = "You are a helpful assistant that answers questions based ONLY on the provided context documents. Follow these rules strictly:\n\n1. Answer ONLY using information found in the context documents\n2. If the answer cannot be found in the context, say \"I cannot find this information in the provided documents\"\n3. Do not make up information or use external knowledge\n4. Be concise and direct in your response\n5. If the context is incomplete or unclear, acknowledge this limitation";
20
+ /**
21
+ * Default system prompt for RAG with source attribution
22
+ */
23
+ export declare const DEFAULT_SYSTEM_PROMPT_WITH_ATTRIBUTION = "You are a helpful assistant that answers questions based ONLY on the provided context documents. Follow these rules strictly:\n\n1. Answer ONLY using information found in the context documents\n2. When possible, mention which document the information comes from\n3. If the answer cannot be found in the context, say \"I cannot find this information in the provided documents\"\n4. Do not make up information or use external knowledge\n5. Be concise and direct in your response";
24
+ /**
25
+ * SmolLM2 chat template format
26
+ * Uses <|im_start|> and <|im_end|> tokens
27
+ */
28
+ export declare const SMOLLM2_CHAT_TEMPLATE: {
29
+ systemStart: string;
30
+ systemEnd: string;
31
+ userStart: string;
32
+ userEnd: string;
33
+ assistantStart: string;
34
+ assistantEnd: string;
35
+ endOfText: string;
36
+ };
37
+ /**
38
+ * Options for formatting context chunks
39
+ */
40
+ export interface ContextFormattingOptions {
41
+ /** Maximum tokens available for context */
42
+ maxContextTokens: number;
43
+ /** Include document titles/sources */
44
+ includeDocumentInfo?: boolean;
45
+ /** Include relevance scores */
46
+ includeScores?: boolean;
47
+ /** Separator between chunks */
48
+ chunkSeparator?: string;
49
+ /** Token estimation function (chars to tokens ratio) */
50
+ tokenEstimationRatio?: number;
51
+ }
52
+ /**
53
+ * Result of context formatting
54
+ */
55
+ export interface FormattedContext {
56
+ /** Formatted context string */
57
+ text: string;
58
+ /** Estimated token count */
59
+ estimatedTokens: number;
60
+ /** Number of chunks included */
61
+ chunksIncluded: number;
62
+ /** Total chunks available */
63
+ totalChunks: number;
64
+ /** Whether context was truncated */
65
+ truncated: boolean;
66
+ }
67
+ /**
68
+ * Format search result chunks into context string for the prompt
69
+ *
70
+ * @param chunks - Search result chunks to format
71
+ * @param options - Formatting options
72
+ * @returns Formatted context with metadata
73
+ */
74
+ export declare function formatContextChunks(chunks: SearchResult[], options: ContextFormattingOptions): FormattedContext;
75
+ /**
76
+ * Options for building the complete prompt
77
+ */
78
+ export interface PromptBuildOptions {
79
+ /** User's query */
80
+ query: string;
81
+ /** Search result chunks */
82
+ chunks: SearchResult[];
83
+ /** Generator model type */
84
+ modelType: GeneratorModelType;
85
+ /** Custom system prompt (optional) */
86
+ systemPrompt?: string;
87
+ /** Maximum context window tokens */
88
+ maxContextLength: number;
89
+ /** Tokens reserved for output */
90
+ reservedOutputTokens: number;
91
+ /** Include source attribution hint */
92
+ includeSourceAttribution?: boolean;
93
+ }
94
+ /**
95
+ * Result of prompt building
96
+ */
97
+ export interface BuiltPrompt {
98
+ /** Complete prompt string */
99
+ prompt: string;
100
+ /** Estimated total tokens */
101
+ estimatedTokens: number;
102
+ /** Context metadata */
103
+ contextInfo: FormattedContext;
104
+ /** System prompt used (if any) */
105
+ systemPromptUsed?: string;
106
+ }
107
+ /**
108
+ * Build a complete prompt for the generator model
109
+ *
110
+ * @param options - Prompt building options
111
+ * @returns Built prompt with metadata
112
+ */
113
+ export declare function buildPrompt(options: PromptBuildOptions): BuiltPrompt;
114
+ /**
115
+ * Estimate token count for a string
116
+ * Uses a simple character-based heuristic (~4 chars per token for English)
117
+ *
118
+ * @param text - Text to estimate tokens for
119
+ * @returns Estimated token count
120
+ */
121
+ export declare function estimateTokenCount(text: string): number;
122
+ /**
123
+ * Calculate available context budget
124
+ *
125
+ * @param maxContextLength - Maximum context window size
126
+ * @param reservedOutputTokens - Tokens reserved for generation
127
+ * @param promptOverhead - Tokens used by prompt formatting
128
+ * @returns Available tokens for context chunks
129
+ */
130
+ export declare function calculateContextBudget(maxContextLength: number, reservedOutputTokens: number, promptOverhead?: number): number;
131
+ /**
132
+ * Get default stop sequences for a model type
133
+ *
134
+ * @param modelType - Generator model type
135
+ * @returns Array of stop sequences
136
+ */
137
+ export declare function getDefaultStopSequences(modelType: GeneratorModelType): string[];
138
+ //# sourceMappingURL=prompt-templates.d.ts.map