rag-lite-ts 2.2.0 → 2.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +88 -5
- package/dist/cjs/cli/indexer.js +73 -15
- package/dist/cjs/cli/search.js +77 -2
- package/dist/cjs/cli/ui-server.d.ts +5 -0
- package/dist/cjs/cli/ui-server.js +152 -0
- package/dist/cjs/cli.js +53 -7
- package/dist/cjs/core/abstract-generator.d.ts +97 -0
- package/dist/cjs/core/abstract-generator.js +222 -0
- package/dist/cjs/core/binary-index-format.js +53 -10
- package/dist/cjs/core/db.d.ts +56 -0
- package/dist/cjs/core/db.js +105 -0
- package/dist/cjs/core/generator-registry.d.ts +114 -0
- package/dist/cjs/core/generator-registry.js +280 -0
- package/dist/cjs/core/index.d.ts +4 -0
- package/dist/cjs/core/index.js +11 -0
- package/dist/cjs/core/ingestion.js +3 -0
- package/dist/cjs/core/knowledge-base-manager.d.ts +109 -0
- package/dist/cjs/core/knowledge-base-manager.js +256 -0
- package/dist/cjs/core/lazy-dependency-loader.d.ts +43 -0
- package/dist/cjs/core/lazy-dependency-loader.js +111 -2
- package/dist/cjs/core/prompt-templates.d.ts +138 -0
- package/dist/cjs/core/prompt-templates.js +225 -0
- package/dist/cjs/core/response-generator.d.ts +132 -0
- package/dist/cjs/core/response-generator.js +69 -0
- package/dist/cjs/core/search-pipeline.js +1 -1
- package/dist/cjs/core/search.d.ts +72 -1
- package/dist/cjs/core/search.js +80 -7
- package/dist/cjs/core/types.d.ts +1 -0
- package/dist/cjs/core/vector-index-messages.d.ts +52 -0
- package/dist/cjs/core/vector-index-messages.js +5 -0
- package/dist/cjs/core/vector-index-worker.d.ts +6 -0
- package/dist/cjs/core/vector-index-worker.js +314 -0
- package/dist/cjs/core/vector-index.d.ts +45 -10
- package/dist/cjs/core/vector-index.js +279 -218
- package/dist/cjs/factories/generator-factory.d.ts +88 -0
- package/dist/cjs/factories/generator-factory.js +151 -0
- package/dist/cjs/factories/index.d.ts +1 -0
- package/dist/cjs/factories/index.js +5 -0
- package/dist/cjs/factories/ingestion-factory.js +3 -7
- package/dist/cjs/factories/search-factory.js +11 -0
- package/dist/cjs/index-manager.d.ts +23 -3
- package/dist/cjs/index-manager.js +84 -15
- package/dist/cjs/index.d.ts +11 -1
- package/dist/cjs/index.js +19 -1
- package/dist/cjs/text/generators/causal-lm-generator.d.ts +65 -0
- package/dist/cjs/text/generators/causal-lm-generator.js +197 -0
- package/dist/cjs/text/generators/index.d.ts +10 -0
- package/dist/cjs/text/generators/index.js +10 -0
- package/dist/cjs/text/generators/instruct-generator.d.ts +62 -0
- package/dist/cjs/text/generators/instruct-generator.js +192 -0
- package/dist/esm/cli/indexer.js +73 -15
- package/dist/esm/cli/search.js +77 -2
- package/dist/esm/cli/ui-server.d.ts +5 -0
- package/dist/esm/cli/ui-server.js +152 -0
- package/dist/esm/cli.js +53 -7
- package/dist/esm/core/abstract-generator.d.ts +97 -0
- package/dist/esm/core/abstract-generator.js +222 -0
- package/dist/esm/core/binary-index-format.js +53 -10
- package/dist/esm/core/db.d.ts +56 -0
- package/dist/esm/core/db.js +105 -0
- package/dist/esm/core/generator-registry.d.ts +114 -0
- package/dist/esm/core/generator-registry.js +280 -0
- package/dist/esm/core/index.d.ts +4 -0
- package/dist/esm/core/index.js +11 -0
- package/dist/esm/core/ingestion.js +3 -0
- package/dist/esm/core/knowledge-base-manager.d.ts +109 -0
- package/dist/esm/core/knowledge-base-manager.js +256 -0
- package/dist/esm/core/lazy-dependency-loader.d.ts +43 -0
- package/dist/esm/core/lazy-dependency-loader.js +111 -2
- package/dist/esm/core/prompt-templates.d.ts +138 -0
- package/dist/esm/core/prompt-templates.js +225 -0
- package/dist/esm/core/response-generator.d.ts +132 -0
- package/dist/esm/core/response-generator.js +69 -0
- package/dist/esm/core/search-pipeline.js +1 -1
- package/dist/esm/core/search.d.ts +72 -1
- package/dist/esm/core/search.js +80 -7
- package/dist/esm/core/types.d.ts +1 -0
- package/dist/esm/core/vector-index-messages.d.ts +52 -0
- package/dist/esm/core/vector-index-messages.js +5 -0
- package/dist/esm/core/vector-index-worker.d.ts +6 -0
- package/dist/esm/core/vector-index-worker.js +314 -0
- package/dist/esm/core/vector-index.d.ts +45 -10
- package/dist/esm/core/vector-index.js +279 -218
- package/dist/esm/factories/generator-factory.d.ts +88 -0
- package/dist/esm/factories/generator-factory.js +151 -0
- package/dist/esm/factories/index.d.ts +1 -0
- package/dist/esm/factories/index.js +5 -0
- package/dist/esm/factories/ingestion-factory.js +3 -7
- package/dist/esm/factories/search-factory.js +11 -0
- package/dist/esm/index-manager.d.ts +23 -3
- package/dist/esm/index-manager.js +84 -15
- package/dist/esm/index.d.ts +11 -1
- package/dist/esm/index.js +19 -1
- package/dist/esm/text/generators/causal-lm-generator.d.ts +65 -0
- package/dist/esm/text/generators/causal-lm-generator.js +197 -0
- package/dist/esm/text/generators/index.d.ts +10 -0
- package/dist/esm/text/generators/index.js +10 -0
- package/dist/esm/text/generators/instruct-generator.d.ts +62 -0
- package/dist/esm/text/generators/instruct-generator.js +192 -0
- package/package.json +14 -7
|
@@ -0,0 +1,256 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Knowledge Base Manager
|
|
3
|
+
*
|
|
4
|
+
* Provides a unified API for managing the knowledge base (database + vector index).
|
|
5
|
+
* This module is designed to solve file locking issues on Windows by using
|
|
6
|
+
* in-place reset operations instead of file deletion.
|
|
7
|
+
*
|
|
8
|
+
* Key Features:
|
|
9
|
+
* - Reset database and index without file deletion (avoids EBUSY/EACCES errors)
|
|
10
|
+
* - Coordinated reset of both database and index in a single operation
|
|
11
|
+
* - Connection management to prevent orphaned handles
|
|
12
|
+
* - Cross-platform compatibility (especially Windows)
|
|
13
|
+
*
|
|
14
|
+
* @module knowledge-base-manager
|
|
15
|
+
*/
|
|
16
|
+
import { openDatabase, resetDatabase, hasDatabaseData } from './db.js';
|
|
17
|
+
import { IndexManager } from '../index-manager.js';
|
|
18
|
+
import { DatabaseConnectionManager } from './database-connection-manager.js';
|
|
19
|
+
import { getModelDefaults, config } from './config.js';
|
|
20
|
+
import { existsSync } from 'fs';
|
|
21
|
+
/**
|
|
22
|
+
* Knowledge Base Manager
|
|
23
|
+
*
|
|
24
|
+
* Manages the complete knowledge base lifecycle including database and vector index.
|
|
25
|
+
* Provides safe reset operations that avoid file locking issues on Windows.
|
|
26
|
+
*
|
|
27
|
+
* @example
|
|
28
|
+
* ```typescript
|
|
29
|
+
* // Reset knowledge base for force rebuild
|
|
30
|
+
* const result = await KnowledgeBaseManager.reset('./db.sqlite', './index.bin');
|
|
31
|
+
* console.log(`Reset ${result.database.documentsDeleted} documents and ${result.index.vectorsCleared} vectors`);
|
|
32
|
+
*
|
|
33
|
+
* // Reset with options
|
|
34
|
+
* const result = await KnowledgeBaseManager.reset('./db.sqlite', './index.bin', {
|
|
35
|
+
* preserveSystemInfo: true, // Keep mode/model configuration
|
|
36
|
+
* modelName: 'all-MiniLM-L6-v2' // Specify model for index
|
|
37
|
+
* });
|
|
38
|
+
* ```
|
|
39
|
+
*/
|
|
40
|
+
export class KnowledgeBaseManager {
|
|
41
|
+
/**
|
|
42
|
+
* Reset the knowledge base by clearing all data while keeping files intact.
|
|
43
|
+
* This is a safer alternative to file deletion that avoids file locking issues on Windows.
|
|
44
|
+
*
|
|
45
|
+
* The reset operation:
|
|
46
|
+
* 1. Closes any existing connections via DatabaseConnectionManager
|
|
47
|
+
* 2. Opens a fresh connection to the database
|
|
48
|
+
* 3. Deletes all rows from documents, chunks, content_metadata tables
|
|
49
|
+
* 4. Optionally runs VACUUM to reclaim disk space
|
|
50
|
+
* 5. Reinitializes the vector index (clears all vectors)
|
|
51
|
+
* 6. Saves the empty index to disk (overwrites existing file content)
|
|
52
|
+
*
|
|
53
|
+
* This approach works because:
|
|
54
|
+
* - We don't delete files, so no EBUSY/EACCES errors
|
|
55
|
+
* - The same file handles can be reused or replaced safely
|
|
56
|
+
* - SQLite transactions ensure data integrity
|
|
57
|
+
* - Index overwrite uses standard file write operations
|
|
58
|
+
*
|
|
59
|
+
* @param dbPath - Path to the SQLite database file
|
|
60
|
+
* @param indexPath - Path to the vector index file
|
|
61
|
+
* @param options - Reset options
|
|
62
|
+
* @returns Promise resolving to reset result statistics
|
|
63
|
+
*
|
|
64
|
+
* @throws Error if database or index reset fails
|
|
65
|
+
*/
|
|
66
|
+
static async reset(dbPath, indexPath, options = {}) {
|
|
67
|
+
const startTime = Date.now();
|
|
68
|
+
const warnings = [];
|
|
69
|
+
console.log('🔄 Starting knowledge base reset...');
|
|
70
|
+
console.log(` Database: ${dbPath}`);
|
|
71
|
+
console.log(` Index: ${indexPath}`);
|
|
72
|
+
// Step 1: Close any existing managed connections to prevent conflicts
|
|
73
|
+
console.log('\n📡 Step 1: Closing existing connections...');
|
|
74
|
+
try {
|
|
75
|
+
if (DatabaseConnectionManager.hasConnection(dbPath)) {
|
|
76
|
+
await DatabaseConnectionManager.forceCloseConnection(dbPath);
|
|
77
|
+
console.log(' ✓ Closed existing database connection');
|
|
78
|
+
}
|
|
79
|
+
else {
|
|
80
|
+
console.log(' ✓ No existing connection to close');
|
|
81
|
+
}
|
|
82
|
+
}
|
|
83
|
+
catch (error) {
|
|
84
|
+
const warning = `Warning: Error closing existing connection: ${error instanceof Error ? error.message : 'Unknown error'}`;
|
|
85
|
+
warnings.push(warning);
|
|
86
|
+
console.warn(` ⚠️ ${warning}`);
|
|
87
|
+
}
|
|
88
|
+
// Small delay to ensure handles are fully released
|
|
89
|
+
await new Promise(resolve => setTimeout(resolve, 50));
|
|
90
|
+
// Step 2: Reset the database
|
|
91
|
+
console.log('\n💾 Step 2: Resetting database...');
|
|
92
|
+
let db = null;
|
|
93
|
+
let dbResetResult;
|
|
94
|
+
try {
|
|
95
|
+
// Open a fresh connection
|
|
96
|
+
db = await openDatabase(dbPath);
|
|
97
|
+
// Perform the reset
|
|
98
|
+
dbResetResult = await resetDatabase(db, {
|
|
99
|
+
preserveSystemInfo: options.preserveSystemInfo,
|
|
100
|
+
runVacuum: options.runVacuum
|
|
101
|
+
});
|
|
102
|
+
console.log(' ✓ Database reset complete');
|
|
103
|
+
}
|
|
104
|
+
catch (error) {
|
|
105
|
+
console.error(' ❌ Database reset failed:', error);
|
|
106
|
+
throw new Error(`Failed to reset database: ${error instanceof Error ? error.message : 'Unknown error'}`);
|
|
107
|
+
}
|
|
108
|
+
finally {
|
|
109
|
+
// Close the database connection
|
|
110
|
+
if (db) {
|
|
111
|
+
try {
|
|
112
|
+
await db.close();
|
|
113
|
+
}
|
|
114
|
+
catch (closeError) {
|
|
115
|
+
warnings.push(`Warning: Error closing database after reset: ${closeError}`);
|
|
116
|
+
}
|
|
117
|
+
}
|
|
118
|
+
}
|
|
119
|
+
// Step 3: Reset the vector index
|
|
120
|
+
console.log('\n📇 Step 3: Resetting vector index...');
|
|
121
|
+
let indexResetResult;
|
|
122
|
+
const indexStartTime = Date.now();
|
|
123
|
+
try {
|
|
124
|
+
// Determine model and dimensions
|
|
125
|
+
const modelName = options.modelName || config.embedding_model;
|
|
126
|
+
const modelDefaults = getModelDefaults(modelName);
|
|
127
|
+
// Check if index file exists
|
|
128
|
+
if (!existsSync(indexPath)) {
|
|
129
|
+
console.log(' Index file does not exist, will be created during ingestion');
|
|
130
|
+
indexResetResult = {
|
|
131
|
+
vectorsCleared: 0,
|
|
132
|
+
resetTimeMs: Date.now() - indexStartTime
|
|
133
|
+
};
|
|
134
|
+
}
|
|
135
|
+
else {
|
|
136
|
+
// Create IndexManager and reset
|
|
137
|
+
// We need to handle dimension mismatch gracefully since the user might be
|
|
138
|
+
// switching models (e.g., from MPNet 768D to MiniLM 384D)
|
|
139
|
+
const indexManager = new IndexManager(indexPath, dbPath, modelDefaults.dimensions, modelName);
|
|
140
|
+
let previousVectorCount = 0;
|
|
141
|
+
try {
|
|
142
|
+
// Try to initialize with forceRecreate=false first to get the vector count
|
|
143
|
+
// skipModelCheck=true since we're resetting anyway
|
|
144
|
+
await indexManager.initialize(true, false);
|
|
145
|
+
// Get current vector count before reset
|
|
146
|
+
previousVectorCount = (await indexManager.hasVectors()) ?
|
|
147
|
+
(await indexManager.getStats()).totalVectors : 0;
|
|
148
|
+
// Perform the reset
|
|
149
|
+
await indexManager.reset();
|
|
150
|
+
}
|
|
151
|
+
catch (initError) {
|
|
152
|
+
// If initialization failed (e.g., dimension mismatch), force recreate the index
|
|
153
|
+
// This handles the case where user is switching models
|
|
154
|
+
const errorMessage = initError?.message || String(initError);
|
|
155
|
+
if (errorMessage.includes('dimension mismatch') || errorMessage.includes('Vector dimension')) {
|
|
156
|
+
console.log(' ⚠️ Dimension mismatch detected - forcing index recreation');
|
|
157
|
+
console.log(' (This is expected when switching embedding models)');
|
|
158
|
+
// Create a fresh IndexManager and force recreate
|
|
159
|
+
const freshIndexManager = new IndexManager(indexPath, dbPath, modelDefaults.dimensions, modelName);
|
|
160
|
+
await freshIndexManager.initialize(true, true); // skipModelCheck=true, forceRecreate=true
|
|
161
|
+
await freshIndexManager.saveIndex();
|
|
162
|
+
await freshIndexManager.close();
|
|
163
|
+
// We don't know the previous count since we couldn't load the old index
|
|
164
|
+
// But we can estimate it was non-zero since the file existed
|
|
165
|
+
previousVectorCount = -1; // Indicate unknown
|
|
166
|
+
}
|
|
167
|
+
else {
|
|
168
|
+
// Re-throw other errors
|
|
169
|
+
throw initError;
|
|
170
|
+
}
|
|
171
|
+
}
|
|
172
|
+
// Close the index manager
|
|
173
|
+
await indexManager.close();
|
|
174
|
+
indexResetResult = {
|
|
175
|
+
vectorsCleared: previousVectorCount,
|
|
176
|
+
resetTimeMs: Date.now() - indexStartTime
|
|
177
|
+
};
|
|
178
|
+
console.log(' ✓ Index reset complete');
|
|
179
|
+
}
|
|
180
|
+
}
|
|
181
|
+
catch (error) {
|
|
182
|
+
console.error(' ❌ Index reset failed:', error);
|
|
183
|
+
throw new Error(`Failed to reset index: ${error instanceof Error ? error.message : 'Unknown error'}`);
|
|
184
|
+
}
|
|
185
|
+
const totalTimeMs = Date.now() - startTime;
|
|
186
|
+
// Summary
|
|
187
|
+
console.log('\n✅ Knowledge base reset complete!');
|
|
188
|
+
console.log(` Total time: ${totalTimeMs}ms`);
|
|
189
|
+
console.log(` Documents deleted: ${dbResetResult.documentsDeleted}`);
|
|
190
|
+
console.log(` Chunks deleted: ${dbResetResult.chunksDeleted}`);
|
|
191
|
+
console.log(` Vectors cleared: ${indexResetResult.vectorsCleared === -1 ? '(unknown - index recreated due to model change)' : indexResetResult.vectorsCleared}`);
|
|
192
|
+
if (warnings.length > 0) {
|
|
193
|
+
console.log(` Warnings: ${warnings.length}`);
|
|
194
|
+
}
|
|
195
|
+
return {
|
|
196
|
+
success: true,
|
|
197
|
+
database: dbResetResult,
|
|
198
|
+
index: indexResetResult,
|
|
199
|
+
totalTimeMs,
|
|
200
|
+
warnings
|
|
201
|
+
};
|
|
202
|
+
}
|
|
203
|
+
/**
|
|
204
|
+
* Check if the knowledge base has any data
|
|
205
|
+
*
|
|
206
|
+
* @param dbPath - Path to the SQLite database file
|
|
207
|
+
* @returns Promise resolving to true if database has data, false if empty
|
|
208
|
+
*/
|
|
209
|
+
static async hasData(dbPath) {
|
|
210
|
+
let db = null;
|
|
211
|
+
try {
|
|
212
|
+
db = await openDatabase(dbPath);
|
|
213
|
+
return await hasDatabaseData(db);
|
|
214
|
+
}
|
|
215
|
+
catch (error) {
|
|
216
|
+
// If we can't open the database, assume no data
|
|
217
|
+
return false;
|
|
218
|
+
}
|
|
219
|
+
finally {
|
|
220
|
+
if (db) {
|
|
221
|
+
try {
|
|
222
|
+
await db.close();
|
|
223
|
+
}
|
|
224
|
+
catch {
|
|
225
|
+
// Ignore close errors
|
|
226
|
+
}
|
|
227
|
+
}
|
|
228
|
+
}
|
|
229
|
+
}
|
|
230
|
+
/**
|
|
231
|
+
* Close all connections to the knowledge base
|
|
232
|
+
* Useful before operations that might conflict with open handles
|
|
233
|
+
*
|
|
234
|
+
* @param dbPath - Path to the SQLite database file
|
|
235
|
+
*/
|
|
236
|
+
static async closeAllConnections(dbPath) {
|
|
237
|
+
console.log('🔒 Closing all knowledge base connections...');
|
|
238
|
+
try {
|
|
239
|
+
if (DatabaseConnectionManager.hasConnection(dbPath)) {
|
|
240
|
+
await DatabaseConnectionManager.forceCloseConnection(dbPath);
|
|
241
|
+
}
|
|
242
|
+
// Also close WAL/SHM connections if they exist
|
|
243
|
+
const sidecars = [`${dbPath}-wal`, `${dbPath}-shm`];
|
|
244
|
+
for (const sidecar of sidecars) {
|
|
245
|
+
if (DatabaseConnectionManager.hasConnection(sidecar)) {
|
|
246
|
+
await DatabaseConnectionManager.forceCloseConnection(sidecar);
|
|
247
|
+
}
|
|
248
|
+
}
|
|
249
|
+
console.log('✓ All connections closed');
|
|
250
|
+
}
|
|
251
|
+
catch (error) {
|
|
252
|
+
console.warn('⚠️ Error closing connections:', error);
|
|
253
|
+
}
|
|
254
|
+
}
|
|
255
|
+
}
|
|
256
|
+
//# sourceMappingURL=knowledge-base-manager.js.map
|
|
@@ -8,6 +8,7 @@
|
|
|
8
8
|
import '../dom-polyfills.js';
|
|
9
9
|
import type { UniversalEmbedder } from './universal-embedder.js';
|
|
10
10
|
import type { RerankFunction } from './interfaces.js';
|
|
11
|
+
import type { ResponseGenerator } from './response-generator.js';
|
|
11
12
|
/**
|
|
12
13
|
* Lazy loader for embedder implementations
|
|
13
14
|
* Only loads the specific embedder type when needed
|
|
@@ -42,6 +43,42 @@ export declare class LazyEmbedderLoader {
|
|
|
42
43
|
multimodalEmbedders: number;
|
|
43
44
|
};
|
|
44
45
|
}
|
|
46
|
+
/**
|
|
47
|
+
* Lazy loader for response generator implementations
|
|
48
|
+
* Only loads the specific generator type when needed
|
|
49
|
+
*
|
|
50
|
+
* @experimental This feature is experimental and may change in future versions.
|
|
51
|
+
*/
|
|
52
|
+
export declare class LazyGeneratorLoader {
|
|
53
|
+
private static cache;
|
|
54
|
+
/**
|
|
55
|
+
* Lazily load and create an instruct generator (SmolLM2-Instruct)
|
|
56
|
+
* Only imports the module when generation is actually requested
|
|
57
|
+
*/
|
|
58
|
+
static loadInstructGenerator(modelName: string, options?: any): Promise<ResponseGenerator>;
|
|
59
|
+
/**
|
|
60
|
+
* Lazily load and create a causal LM generator (DistilGPT2)
|
|
61
|
+
* Only imports the module when generation is actually requested
|
|
62
|
+
*/
|
|
63
|
+
static loadCausalLMGenerator(modelName: string, options?: any): Promise<ResponseGenerator>;
|
|
64
|
+
/**
|
|
65
|
+
* Check if a generator is already loaded in cache
|
|
66
|
+
*/
|
|
67
|
+
static isGeneratorLoaded(modelName: string, modelType: 'instruct' | 'causal-lm'): boolean;
|
|
68
|
+
/**
|
|
69
|
+
* Remove a generator from the cache (called when generator is cleaned up)
|
|
70
|
+
*/
|
|
71
|
+
static removeGeneratorFromCache(modelName: string, modelType: 'instruct' | 'causal-lm'): void;
|
|
72
|
+
/**
|
|
73
|
+
* Get statistics about loaded generators
|
|
74
|
+
*/
|
|
75
|
+
static getLoadingStats(): {
|
|
76
|
+
loadedGenerators: string[];
|
|
77
|
+
totalLoaded: number;
|
|
78
|
+
instructGenerators: number;
|
|
79
|
+
causalLMGenerators: number;
|
|
80
|
+
};
|
|
81
|
+
}
|
|
45
82
|
/**
|
|
46
83
|
* Lazy loader for reranking implementations
|
|
47
84
|
* Only loads the specific reranker type when needed
|
|
@@ -107,6 +144,11 @@ export declare class LazyMultimodalLoader {
|
|
|
107
144
|
* Provides a single entry point for dependency management
|
|
108
145
|
*/
|
|
109
146
|
export declare class LazyDependencyManager {
|
|
147
|
+
/**
|
|
148
|
+
* Load response generator based on model type with lazy loading
|
|
149
|
+
* @experimental This feature is experimental and may change in future versions.
|
|
150
|
+
*/
|
|
151
|
+
static loadGenerator(modelName: string, modelType: 'instruct' | 'causal-lm', options?: any): Promise<ResponseGenerator>;
|
|
110
152
|
/**
|
|
111
153
|
* Load embedder based on model type with lazy loading
|
|
112
154
|
*/
|
|
@@ -121,6 +163,7 @@ export declare class LazyDependencyManager {
|
|
|
121
163
|
static getLoadingStatistics(): {
|
|
122
164
|
embedders: ReturnType<typeof LazyEmbedderLoader.getLoadingStats>;
|
|
123
165
|
rerankers: ReturnType<typeof LazyRerankerLoader.getLoadingStats>;
|
|
166
|
+
generators: ReturnType<typeof LazyGeneratorLoader.getLoadingStats>;
|
|
124
167
|
multimodal: ReturnType<typeof LazyMultimodalLoader.getMultimodalLoadingStatus>;
|
|
125
168
|
totalModulesLoaded: number;
|
|
126
169
|
memoryImpact: 'low' | 'medium' | 'high';
|
|
@@ -149,6 +149,99 @@ export class LazyEmbedderLoader {
|
|
|
149
149
|
}
|
|
150
150
|
}
|
|
151
151
|
// =============================================================================
|
|
152
|
+
// LAZY GENERATOR LOADING
|
|
153
|
+
// =============================================================================
|
|
154
|
+
/**
|
|
155
|
+
* Lazy loader for response generator implementations
|
|
156
|
+
* Only loads the specific generator type when needed
|
|
157
|
+
*
|
|
158
|
+
* @experimental This feature is experimental and may change in future versions.
|
|
159
|
+
*/
|
|
160
|
+
export class LazyGeneratorLoader {
|
|
161
|
+
static cache = LazyLoadingCache.getInstance();
|
|
162
|
+
/**
|
|
163
|
+
* Lazily load and create an instruct generator (SmolLM2-Instruct)
|
|
164
|
+
* Only imports the module when generation is actually requested
|
|
165
|
+
*/
|
|
166
|
+
static async loadInstructGenerator(modelName, options = {}) {
|
|
167
|
+
const cacheKey = `generator:instruct:${modelName}`;
|
|
168
|
+
return this.cache.getOrLoad(cacheKey, async () => {
|
|
169
|
+
try {
|
|
170
|
+
console.log(`🔄 [EXPERIMENTAL] Lazy loading instruct generator: ${modelName}`);
|
|
171
|
+
// Dynamic import - only loaded when generation is requested
|
|
172
|
+
const { InstructGenerator } = await import('../text/generators/instruct-generator.js');
|
|
173
|
+
const generator = new InstructGenerator(modelName, options);
|
|
174
|
+
await generator.loadModel();
|
|
175
|
+
console.log(`✅ Instruct generator loaded: ${modelName}`);
|
|
176
|
+
return generator;
|
|
177
|
+
}
|
|
178
|
+
catch (error) {
|
|
179
|
+
const enhancedError = createError.model(`Failed to lazy load instruct generator '${modelName}': ${error instanceof Error ? error.message : 'Unknown error'}`);
|
|
180
|
+
handleError(enhancedError, 'LazyGeneratorLoader', {
|
|
181
|
+
severity: ErrorSeverity.ERROR,
|
|
182
|
+
category: ErrorCategory.MODEL
|
|
183
|
+
});
|
|
184
|
+
throw enhancedError;
|
|
185
|
+
}
|
|
186
|
+
});
|
|
187
|
+
}
|
|
188
|
+
/**
|
|
189
|
+
* Lazily load and create a causal LM generator (DistilGPT2)
|
|
190
|
+
* Only imports the module when generation is actually requested
|
|
191
|
+
*/
|
|
192
|
+
static async loadCausalLMGenerator(modelName, options = {}) {
|
|
193
|
+
const cacheKey = `generator:causal-lm:${modelName}`;
|
|
194
|
+
return this.cache.getOrLoad(cacheKey, async () => {
|
|
195
|
+
try {
|
|
196
|
+
console.log(`🔄 [EXPERIMENTAL] Lazy loading causal LM generator: ${modelName}`);
|
|
197
|
+
// Dynamic import - only loaded when generation is requested
|
|
198
|
+
const { CausalLMGenerator } = await import('../text/generators/causal-lm-generator.js');
|
|
199
|
+
const generator = new CausalLMGenerator(modelName, options);
|
|
200
|
+
await generator.loadModel();
|
|
201
|
+
console.log(`✅ Causal LM generator loaded: ${modelName}`);
|
|
202
|
+
return generator;
|
|
203
|
+
}
|
|
204
|
+
catch (error) {
|
|
205
|
+
const enhancedError = createError.model(`Failed to lazy load causal LM generator '${modelName}': ${error instanceof Error ? error.message : 'Unknown error'}`);
|
|
206
|
+
handleError(enhancedError, 'LazyGeneratorLoader', {
|
|
207
|
+
severity: ErrorSeverity.ERROR,
|
|
208
|
+
category: ErrorCategory.MODEL
|
|
209
|
+
});
|
|
210
|
+
throw enhancedError;
|
|
211
|
+
}
|
|
212
|
+
});
|
|
213
|
+
}
|
|
214
|
+
/**
|
|
215
|
+
* Check if a generator is already loaded in cache
|
|
216
|
+
*/
|
|
217
|
+
static isGeneratorLoaded(modelName, modelType) {
|
|
218
|
+
const cacheKey = `generator:${modelType}:${modelName}`;
|
|
219
|
+
return this.cache.getLoadedModules().includes(cacheKey);
|
|
220
|
+
}
|
|
221
|
+
/**
|
|
222
|
+
* Remove a generator from the cache (called when generator is cleaned up)
|
|
223
|
+
*/
|
|
224
|
+
static removeGeneratorFromCache(modelName, modelType) {
|
|
225
|
+
const cacheKey = `generator:${modelType}:${modelName}`;
|
|
226
|
+
this.cache.remove(cacheKey);
|
|
227
|
+
console.log(`🧹 Removed generator from cache: ${cacheKey}`);
|
|
228
|
+
}
|
|
229
|
+
/**
|
|
230
|
+
* Get statistics about loaded generators
|
|
231
|
+
*/
|
|
232
|
+
static getLoadingStats() {
|
|
233
|
+
const loadedModules = this.cache.getLoadedModules().filter(key => key.startsWith('generator:'));
|
|
234
|
+
const instructGenerators = loadedModules.filter(key => key.includes(':instruct:')).length;
|
|
235
|
+
const causalLMGenerators = loadedModules.filter(key => key.includes(':causal-lm:')).length;
|
|
236
|
+
return {
|
|
237
|
+
loadedGenerators: loadedModules,
|
|
238
|
+
totalLoaded: loadedModules.length,
|
|
239
|
+
instructGenerators,
|
|
240
|
+
causalLMGenerators
|
|
241
|
+
};
|
|
242
|
+
}
|
|
243
|
+
}
|
|
244
|
+
// =============================================================================
|
|
152
245
|
// LAZY RERANKER LOADING
|
|
153
246
|
// =============================================================================
|
|
154
247
|
/**
|
|
@@ -332,6 +425,20 @@ export class LazyMultimodalLoader {
|
|
|
332
425
|
* Provides a single entry point for dependency management
|
|
333
426
|
*/
|
|
334
427
|
export class LazyDependencyManager {
|
|
428
|
+
/**
|
|
429
|
+
* Load response generator based on model type with lazy loading
|
|
430
|
+
* @experimental This feature is experimental and may change in future versions.
|
|
431
|
+
*/
|
|
432
|
+
static async loadGenerator(modelName, modelType, options = {}) {
|
|
433
|
+
switch (modelType) {
|
|
434
|
+
case 'instruct':
|
|
435
|
+
return LazyGeneratorLoader.loadInstructGenerator(modelName, options);
|
|
436
|
+
case 'causal-lm':
|
|
437
|
+
return LazyGeneratorLoader.loadCausalLMGenerator(modelName, options);
|
|
438
|
+
default:
|
|
439
|
+
throw createError.validation(`Unsupported generator model type for lazy loading: ${modelType}`);
|
|
440
|
+
}
|
|
441
|
+
}
|
|
335
442
|
/**
|
|
336
443
|
* Load embedder based on model type with lazy loading
|
|
337
444
|
*/
|
|
@@ -367,19 +474,21 @@ export class LazyDependencyManager {
|
|
|
367
474
|
static getLoadingStatistics() {
|
|
368
475
|
const embedderStats = LazyEmbedderLoader.getLoadingStats();
|
|
369
476
|
const rerankerStats = LazyRerankerLoader.getLoadingStats();
|
|
477
|
+
const generatorStats = LazyGeneratorLoader.getLoadingStats();
|
|
370
478
|
const multimodalStats = LazyMultimodalLoader.getMultimodalLoadingStatus();
|
|
371
|
-
const totalModules = embedderStats.totalLoaded + rerankerStats.totalLoaded + multimodalStats.loadedProcessors.length;
|
|
479
|
+
const totalModules = embedderStats.totalLoaded + rerankerStats.totalLoaded + generatorStats.totalLoaded + multimodalStats.loadedProcessors.length;
|
|
372
480
|
// Estimate memory impact based on loaded modules
|
|
373
481
|
let memoryImpact = 'low';
|
|
374
482
|
if (embedderStats.multimodalEmbedders > 0 || multimodalStats.imageToTextLoaded) {
|
|
375
483
|
memoryImpact = 'high';
|
|
376
484
|
}
|
|
377
|
-
else if (totalModules > 2) {
|
|
485
|
+
else if (totalModules > 2 || generatorStats.totalLoaded > 0) {
|
|
378
486
|
memoryImpact = 'medium';
|
|
379
487
|
}
|
|
380
488
|
return {
|
|
381
489
|
embedders: embedderStats,
|
|
382
490
|
rerankers: rerankerStats,
|
|
491
|
+
generators: generatorStats,
|
|
383
492
|
multimodal: multimodalStats,
|
|
384
493
|
totalModulesLoaded: totalModules,
|
|
385
494
|
memoryImpact
|
|
@@ -0,0 +1,138 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* CORE MODULE — Prompt Templates for RAG Response Generation
|
|
3
|
+
*
|
|
4
|
+
* Provides prompt engineering utilities for different generator model types.
|
|
5
|
+
* Handles context formatting, token budget management, and system prompts.
|
|
6
|
+
*
|
|
7
|
+
* PROMPT STRATEGIES:
|
|
8
|
+
* - Instruct models: Use chat template with system/user/assistant roles
|
|
9
|
+
* - Causal LM models: Use simple document + question format
|
|
10
|
+
*
|
|
11
|
+
* @experimental This feature is experimental and may change in future versions.
|
|
12
|
+
*/
|
|
13
|
+
import type { SearchResult } from './types.js';
|
|
14
|
+
import type { GeneratorModelType } from './response-generator.js';
|
|
15
|
+
/**
|
|
16
|
+
* Default system prompt for instruct models
|
|
17
|
+
* Emphasizes grounded responses using only provided context
|
|
18
|
+
*/
|
|
19
|
+
export declare const DEFAULT_SYSTEM_PROMPT = "You are a helpful assistant that answers questions based ONLY on the provided context documents. Follow these rules strictly:\n\n1. Answer ONLY using information found in the context documents\n2. If the answer cannot be found in the context, say \"I cannot find this information in the provided documents\"\n3. Do not make up information or use external knowledge\n4. Be concise and direct in your response\n5. If the context is incomplete or unclear, acknowledge this limitation";
|
|
20
|
+
/**
|
|
21
|
+
* Default system prompt for RAG with source attribution
|
|
22
|
+
*/
|
|
23
|
+
export declare const DEFAULT_SYSTEM_PROMPT_WITH_ATTRIBUTION = "You are a helpful assistant that answers questions based ONLY on the provided context documents. Follow these rules strictly:\n\n1. Answer ONLY using information found in the context documents\n2. When possible, mention which document the information comes from\n3. If the answer cannot be found in the context, say \"I cannot find this information in the provided documents\"\n4. Do not make up information or use external knowledge\n5. Be concise and direct in your response";
|
|
24
|
+
/**
|
|
25
|
+
* SmolLM2 chat template format
|
|
26
|
+
* Uses <|im_start|> and <|im_end|> tokens
|
|
27
|
+
*/
|
|
28
|
+
export declare const SMOLLM2_CHAT_TEMPLATE: {
|
|
29
|
+
systemStart: string;
|
|
30
|
+
systemEnd: string;
|
|
31
|
+
userStart: string;
|
|
32
|
+
userEnd: string;
|
|
33
|
+
assistantStart: string;
|
|
34
|
+
assistantEnd: string;
|
|
35
|
+
endOfText: string;
|
|
36
|
+
};
|
|
37
|
+
/**
|
|
38
|
+
* Options for formatting context chunks
|
|
39
|
+
*/
|
|
40
|
+
export interface ContextFormattingOptions {
|
|
41
|
+
/** Maximum tokens available for context */
|
|
42
|
+
maxContextTokens: number;
|
|
43
|
+
/** Include document titles/sources */
|
|
44
|
+
includeDocumentInfo?: boolean;
|
|
45
|
+
/** Include relevance scores */
|
|
46
|
+
includeScores?: boolean;
|
|
47
|
+
/** Separator between chunks */
|
|
48
|
+
chunkSeparator?: string;
|
|
49
|
+
/** Token estimation function (chars to tokens ratio) */
|
|
50
|
+
tokenEstimationRatio?: number;
|
|
51
|
+
}
|
|
52
|
+
/**
|
|
53
|
+
* Result of context formatting
|
|
54
|
+
*/
|
|
55
|
+
export interface FormattedContext {
|
|
56
|
+
/** Formatted context string */
|
|
57
|
+
text: string;
|
|
58
|
+
/** Estimated token count */
|
|
59
|
+
estimatedTokens: number;
|
|
60
|
+
/** Number of chunks included */
|
|
61
|
+
chunksIncluded: number;
|
|
62
|
+
/** Total chunks available */
|
|
63
|
+
totalChunks: number;
|
|
64
|
+
/** Whether context was truncated */
|
|
65
|
+
truncated: boolean;
|
|
66
|
+
}
|
|
67
|
+
/**
|
|
68
|
+
* Format search result chunks into context string for the prompt
|
|
69
|
+
*
|
|
70
|
+
* @param chunks - Search result chunks to format
|
|
71
|
+
* @param options - Formatting options
|
|
72
|
+
* @returns Formatted context with metadata
|
|
73
|
+
*/
|
|
74
|
+
export declare function formatContextChunks(chunks: SearchResult[], options: ContextFormattingOptions): FormattedContext;
|
|
75
|
+
/**
|
|
76
|
+
* Options for building the complete prompt
|
|
77
|
+
*/
|
|
78
|
+
export interface PromptBuildOptions {
|
|
79
|
+
/** User's query */
|
|
80
|
+
query: string;
|
|
81
|
+
/** Search result chunks */
|
|
82
|
+
chunks: SearchResult[];
|
|
83
|
+
/** Generator model type */
|
|
84
|
+
modelType: GeneratorModelType;
|
|
85
|
+
/** Custom system prompt (optional) */
|
|
86
|
+
systemPrompt?: string;
|
|
87
|
+
/** Maximum context window tokens */
|
|
88
|
+
maxContextLength: number;
|
|
89
|
+
/** Tokens reserved for output */
|
|
90
|
+
reservedOutputTokens: number;
|
|
91
|
+
/** Include source attribution hint */
|
|
92
|
+
includeSourceAttribution?: boolean;
|
|
93
|
+
}
|
|
94
|
+
/**
|
|
95
|
+
* Result of prompt building
|
|
96
|
+
*/
|
|
97
|
+
export interface BuiltPrompt {
|
|
98
|
+
/** Complete prompt string */
|
|
99
|
+
prompt: string;
|
|
100
|
+
/** Estimated total tokens */
|
|
101
|
+
estimatedTokens: number;
|
|
102
|
+
/** Context metadata */
|
|
103
|
+
contextInfo: FormattedContext;
|
|
104
|
+
/** System prompt used (if any) */
|
|
105
|
+
systemPromptUsed?: string;
|
|
106
|
+
}
|
|
107
|
+
/**
|
|
108
|
+
* Build a complete prompt for the generator model
|
|
109
|
+
*
|
|
110
|
+
* @param options - Prompt building options
|
|
111
|
+
* @returns Built prompt with metadata
|
|
112
|
+
*/
|
|
113
|
+
export declare function buildPrompt(options: PromptBuildOptions): BuiltPrompt;
|
|
114
|
+
/**
|
|
115
|
+
* Estimate token count for a string
|
|
116
|
+
* Uses a simple character-based heuristic (~4 chars per token for English)
|
|
117
|
+
*
|
|
118
|
+
* @param text - Text to estimate tokens for
|
|
119
|
+
* @returns Estimated token count
|
|
120
|
+
*/
|
|
121
|
+
export declare function estimateTokenCount(text: string): number;
|
|
122
|
+
/**
|
|
123
|
+
* Calculate available context budget
|
|
124
|
+
*
|
|
125
|
+
* @param maxContextLength - Maximum context window size
|
|
126
|
+
* @param reservedOutputTokens - Tokens reserved for generation
|
|
127
|
+
* @param promptOverhead - Tokens used by prompt formatting
|
|
128
|
+
* @returns Available tokens for context chunks
|
|
129
|
+
*/
|
|
130
|
+
export declare function calculateContextBudget(maxContextLength: number, reservedOutputTokens: number, promptOverhead?: number): number;
|
|
131
|
+
/**
|
|
132
|
+
* Get default stop sequences for a model type
|
|
133
|
+
*
|
|
134
|
+
* @param modelType - Generator model type
|
|
135
|
+
* @returns Array of stop sequences
|
|
136
|
+
*/
|
|
137
|
+
export declare function getDefaultStopSequences(modelType: GeneratorModelType): string[];
|
|
138
|
+
//# sourceMappingURL=prompt-templates.d.ts.map
|