rag-lite-ts 2.3.0 → 2.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cjs/cli/search.js +77 -2
- package/dist/cjs/cli.js +28 -1
- package/dist/cjs/core/abstract-generator.d.ts +97 -0
- package/dist/cjs/core/abstract-generator.js +222 -0
- package/dist/cjs/core/binary-index-format.js +47 -7
- package/dist/cjs/core/generator-registry.d.ts +114 -0
- package/dist/cjs/core/generator-registry.js +280 -0
- package/dist/cjs/core/index.d.ts +4 -0
- package/dist/cjs/core/index.js +11 -0
- package/dist/cjs/core/lazy-dependency-loader.d.ts +43 -0
- package/dist/cjs/core/lazy-dependency-loader.js +111 -2
- package/dist/cjs/core/prompt-templates.d.ts +138 -0
- package/dist/cjs/core/prompt-templates.js +225 -0
- package/dist/cjs/core/response-generator.d.ts +132 -0
- package/dist/cjs/core/response-generator.js +69 -0
- package/dist/cjs/core/search.d.ts +72 -1
- package/dist/cjs/core/search.js +79 -6
- package/dist/cjs/core/types.d.ts +1 -0
- package/dist/cjs/core/vector-index-worker.js +10 -0
- package/dist/cjs/core/vector-index.js +69 -19
- package/dist/cjs/factories/generator-factory.d.ts +88 -0
- package/dist/cjs/factories/generator-factory.js +151 -0
- package/dist/cjs/factories/index.d.ts +1 -0
- package/dist/cjs/factories/index.js +5 -0
- package/dist/cjs/index.d.ts +9 -0
- package/dist/cjs/index.js +16 -0
- package/dist/cjs/text/generators/causal-lm-generator.d.ts +65 -0
- package/dist/cjs/text/generators/causal-lm-generator.js +197 -0
- package/dist/cjs/text/generators/index.d.ts +10 -0
- package/dist/cjs/text/generators/index.js +10 -0
- package/dist/cjs/text/generators/instruct-generator.d.ts +62 -0
- package/dist/cjs/text/generators/instruct-generator.js +192 -0
- package/dist/esm/cli/search.js +77 -2
- package/dist/esm/cli.js +28 -1
- package/dist/esm/core/abstract-generator.d.ts +97 -0
- package/dist/esm/core/abstract-generator.js +222 -0
- package/dist/esm/core/binary-index-format.js +47 -7
- package/dist/esm/core/generator-registry.d.ts +114 -0
- package/dist/esm/core/generator-registry.js +280 -0
- package/dist/esm/core/index.d.ts +4 -0
- package/dist/esm/core/index.js +11 -0
- package/dist/esm/core/lazy-dependency-loader.d.ts +43 -0
- package/dist/esm/core/lazy-dependency-loader.js +111 -2
- package/dist/esm/core/prompt-templates.d.ts +138 -0
- package/dist/esm/core/prompt-templates.js +225 -0
- package/dist/esm/core/response-generator.d.ts +132 -0
- package/dist/esm/core/response-generator.js +69 -0
- package/dist/esm/core/search.d.ts +72 -1
- package/dist/esm/core/search.js +79 -6
- package/dist/esm/core/types.d.ts +1 -0
- package/dist/esm/core/vector-index-worker.js +10 -0
- package/dist/esm/core/vector-index.js +69 -19
- package/dist/esm/factories/generator-factory.d.ts +88 -0
- package/dist/esm/factories/generator-factory.js +151 -0
- package/dist/esm/factories/index.d.ts +1 -0
- package/dist/esm/factories/index.js +5 -0
- package/dist/esm/index.d.ts +9 -0
- package/dist/esm/index.js +16 -0
- package/dist/esm/text/generators/causal-lm-generator.d.ts +65 -0
- package/dist/esm/text/generators/causal-lm-generator.js +197 -0
- package/dist/esm/text/generators/index.d.ts +10 -0
- package/dist/esm/text/generators/index.js +10 -0
- package/dist/esm/text/generators/instruct-generator.d.ts +62 -0
- package/dist/esm/text/generators/instruct-generator.js +192 -0
- package/package.json +1 -1
|
@@ -0,0 +1,280 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* CORE MODULE — Generator Registry for RAG Response Generation
|
|
3
|
+
*
|
|
4
|
+
* Centralized registry of supported generator models with validation and
|
|
5
|
+
* compatibility checking. Follows the same patterns as model-registry.ts.
|
|
6
|
+
*
|
|
7
|
+
* SUPPORTED MODELS:
|
|
8
|
+
* - HuggingFaceTB/SmolLM2-135M-Instruct: Balanced instruct model (DEFAULT, 3 chunks)
|
|
9
|
+
* - HuggingFaceTB/SmolLM2-360M-Instruct: Higher quality instruct model (5 chunks)
|
|
10
|
+
*
|
|
11
|
+
* PREREQUISITES:
|
|
12
|
+
* - Reranking must be enabled for response generation to ensure quality context
|
|
13
|
+
*
|
|
14
|
+
* @experimental This feature is experimental and may change in future versions.
|
|
15
|
+
*/
|
|
16
|
+
// =============================================================================
|
|
17
|
+
// GENERATOR REGISTRY
|
|
18
|
+
// =============================================================================
|
|
19
|
+
/**
|
|
20
|
+
* Registry of supported generator models with their metadata and capabilities
|
|
21
|
+
*/
|
|
22
|
+
export const SUPPORTED_GENERATORS = {
|
|
23
|
+
// SmolLM2-135M-Instruct - Balanced instruction-tuned model (RECOMMENDED DEFAULT)
|
|
24
|
+
'HuggingFaceTB/SmolLM2-135M-Instruct': {
|
|
25
|
+
name: 'HuggingFaceTB/SmolLM2-135M-Instruct',
|
|
26
|
+
type: 'instruct',
|
|
27
|
+
version: '1.0.0',
|
|
28
|
+
description: 'Balanced instruction-tuned model with good quality and speed (uses top 3 chunks)',
|
|
29
|
+
capabilities: {
|
|
30
|
+
supportsStreaming: true,
|
|
31
|
+
supportsSystemPrompt: true, // Instruct models support system prompts
|
|
32
|
+
instructionTuned: true,
|
|
33
|
+
maxContextLength: 2048,
|
|
34
|
+
defaultMaxOutputTokens: 512,
|
|
35
|
+
recommendedTemperature: 0.1,
|
|
36
|
+
defaultMaxChunksForContext: 3 // Use top 3 reranked chunks for context
|
|
37
|
+
},
|
|
38
|
+
requirements: {
|
|
39
|
+
transformersJsVersion: '>=3.0.0',
|
|
40
|
+
minimumMemory: 768,
|
|
41
|
+
requiredFeatures: ['text-generation'],
|
|
42
|
+
platformSupport: ['node', 'browser']
|
|
43
|
+
},
|
|
44
|
+
isDefault: true // Recommended default model
|
|
45
|
+
},
|
|
46
|
+
// SmolLM2-360M-Instruct - Higher quality instruction-tuned model
|
|
47
|
+
'HuggingFaceTB/SmolLM2-360M-Instruct': {
|
|
48
|
+
name: 'HuggingFaceTB/SmolLM2-360M-Instruct',
|
|
49
|
+
type: 'instruct',
|
|
50
|
+
version: '1.0.0',
|
|
51
|
+
description: 'Higher quality instruction-tuned model, slower but more accurate (uses top 5 chunks)',
|
|
52
|
+
capabilities: {
|
|
53
|
+
supportsStreaming: true,
|
|
54
|
+
supportsSystemPrompt: true,
|
|
55
|
+
instructionTuned: true,
|
|
56
|
+
maxContextLength: 2048,
|
|
57
|
+
defaultMaxOutputTokens: 512,
|
|
58
|
+
recommendedTemperature: 0.1,
|
|
59
|
+
defaultMaxChunksForContext: 5 // Use top 5 reranked chunks for context
|
|
60
|
+
},
|
|
61
|
+
requirements: {
|
|
62
|
+
transformersJsVersion: '>=3.0.0',
|
|
63
|
+
minimumMemory: 1024,
|
|
64
|
+
requiredFeatures: ['text-generation'],
|
|
65
|
+
platformSupport: ['node', 'browser']
|
|
66
|
+
},
|
|
67
|
+
isDefault: false
|
|
68
|
+
}
|
|
69
|
+
};
|
|
70
|
+
// =============================================================================
|
|
71
|
+
// DEFAULT MODEL
|
|
72
|
+
// =============================================================================
|
|
73
|
+
/** Default generator model name */
|
|
74
|
+
export const DEFAULT_GENERATOR_MODEL = 'HuggingFaceTB/SmolLM2-135M-Instruct';
|
|
75
|
+
// =============================================================================
|
|
76
|
+
// GENERATOR REGISTRY CLASS
|
|
77
|
+
// =============================================================================
|
|
78
|
+
/**
|
|
79
|
+
* Generator registry class providing validation and model information services
|
|
80
|
+
*/
|
|
81
|
+
export class GeneratorRegistry {
|
|
82
|
+
/**
|
|
83
|
+
* Gets generator model information for a given model name
|
|
84
|
+
* @param modelName - Name of the generator model
|
|
85
|
+
* @returns Generator model information or null if not supported
|
|
86
|
+
*/
|
|
87
|
+
static getGeneratorInfo(modelName) {
|
|
88
|
+
return SUPPORTED_GENERATORS[modelName] || null;
|
|
89
|
+
}
|
|
90
|
+
/**
|
|
91
|
+
* Validates a generator model name and returns compatibility information
|
|
92
|
+
* @param modelName - Name of the model to validate
|
|
93
|
+
* @returns Validation result with errors, warnings, and suggestions
|
|
94
|
+
*/
|
|
95
|
+
static validateGenerator(modelName) {
|
|
96
|
+
const modelInfo = this.getGeneratorInfo(modelName);
|
|
97
|
+
if (!modelInfo) {
|
|
98
|
+
const suggestions = this.getSimilarGenerators(modelName);
|
|
99
|
+
return {
|
|
100
|
+
isValid: false,
|
|
101
|
+
errors: [`Generator model '${modelName}' is not supported`],
|
|
102
|
+
warnings: [],
|
|
103
|
+
suggestions: suggestions.length > 0
|
|
104
|
+
? [`Did you mean: ${suggestions.join(', ')}?`]
|
|
105
|
+
: [`Available generators: ${this.getSupportedGenerators().join(', ')}`]
|
|
106
|
+
};
|
|
107
|
+
}
|
|
108
|
+
const warnings = [];
|
|
109
|
+
const suggestions = [];
|
|
110
|
+
// Memory warnings
|
|
111
|
+
if (modelInfo.requirements.minimumMemory > 768) {
|
|
112
|
+
warnings.push(`Model requires ${modelInfo.requirements.minimumMemory}MB memory`);
|
|
113
|
+
}
|
|
114
|
+
return {
|
|
115
|
+
isValid: true,
|
|
116
|
+
errors: [],
|
|
117
|
+
warnings,
|
|
118
|
+
suggestions
|
|
119
|
+
};
|
|
120
|
+
}
|
|
121
|
+
/**
|
|
122
|
+
* Lists all supported generator models
|
|
123
|
+
* @param modelType - Optional filter by model type
|
|
124
|
+
* @returns Array of supported generator model names
|
|
125
|
+
*/
|
|
126
|
+
static getSupportedGenerators(modelType) {
|
|
127
|
+
const allModels = Object.keys(SUPPORTED_GENERATORS);
|
|
128
|
+
if (!modelType) {
|
|
129
|
+
return allModels;
|
|
130
|
+
}
|
|
131
|
+
return allModels.filter(modelName => SUPPORTED_GENERATORS[modelName].type === modelType);
|
|
132
|
+
}
|
|
133
|
+
/**
|
|
134
|
+
* Gets the default generator model name
|
|
135
|
+
* @returns Default generator model name
|
|
136
|
+
*/
|
|
137
|
+
static getDefaultGenerator() {
|
|
138
|
+
return DEFAULT_GENERATOR_MODEL;
|
|
139
|
+
}
|
|
140
|
+
/**
|
|
141
|
+
* Gets generators by type
|
|
142
|
+
* @param type - Generator type ('causal-lm' or 'instruct')
|
|
143
|
+
* @returns Array of model names matching the type
|
|
144
|
+
*/
|
|
145
|
+
static getGeneratorsByType(type) {
|
|
146
|
+
return Object.keys(SUPPORTED_GENERATORS).filter(modelName => SUPPORTED_GENERATORS[modelName].type === type);
|
|
147
|
+
}
|
|
148
|
+
/**
|
|
149
|
+
* Checks if a generator model supports a specific capability
|
|
150
|
+
* @param modelName - Name of the model
|
|
151
|
+
* @param capability - Capability to check
|
|
152
|
+
* @returns True if the model supports the capability
|
|
153
|
+
*/
|
|
154
|
+
static supportsCapability(modelName, capability) {
|
|
155
|
+
const modelInfo = this.getGeneratorInfo(modelName);
|
|
156
|
+
if (!modelInfo)
|
|
157
|
+
return false;
|
|
158
|
+
const value = modelInfo.capabilities[capability];
|
|
159
|
+
return typeof value === 'boolean' ? value : value !== undefined;
|
|
160
|
+
}
|
|
161
|
+
/**
|
|
162
|
+
* Gets generators similar to the given model name (for suggestions)
|
|
163
|
+
* @private
|
|
164
|
+
*/
|
|
165
|
+
static getSimilarGenerators(modelName) {
|
|
166
|
+
const allModels = Object.keys(SUPPORTED_GENERATORS);
|
|
167
|
+
const lowerModelName = modelName.toLowerCase();
|
|
168
|
+
// Simple similarity check based on common substrings
|
|
169
|
+
const keywords = ['gpt', 'smol', 'lm', 'instruct', 'distil'];
|
|
170
|
+
const modelKeywords = keywords.filter(keyword => lowerModelName.includes(keyword));
|
|
171
|
+
return allModels.filter(supportedModel => {
|
|
172
|
+
const lowerSupported = supportedModel.toLowerCase();
|
|
173
|
+
return modelKeywords.some(keyword => lowerSupported.includes(keyword));
|
|
174
|
+
}).slice(0, 3);
|
|
175
|
+
}
|
|
176
|
+
/**
|
|
177
|
+
* Validates system compatibility for a generator model
|
|
178
|
+
* @param modelName - Name of the model
|
|
179
|
+
* @param systemCapabilities - System capabilities to check against
|
|
180
|
+
* @returns Validation result with compatibility information
|
|
181
|
+
*/
|
|
182
|
+
static validateSystemCompatibility(modelName, systemCapabilities) {
|
|
183
|
+
const modelInfo = this.getGeneratorInfo(modelName);
|
|
184
|
+
if (!modelInfo) {
|
|
185
|
+
return {
|
|
186
|
+
isValid: false,
|
|
187
|
+
errors: [`Generator model '${modelName}' is not supported`],
|
|
188
|
+
warnings: [],
|
|
189
|
+
suggestions: []
|
|
190
|
+
};
|
|
191
|
+
}
|
|
192
|
+
const errors = [];
|
|
193
|
+
const warnings = [];
|
|
194
|
+
const suggestions = [];
|
|
195
|
+
// Check memory requirements
|
|
196
|
+
if (systemCapabilities.availableMemory !== undefined) {
|
|
197
|
+
if (systemCapabilities.availableMemory < modelInfo.requirements.minimumMemory) {
|
|
198
|
+
errors.push(`Insufficient memory: ${systemCapabilities.availableMemory}MB available, ` +
|
|
199
|
+
`${modelInfo.requirements.minimumMemory}MB required`);
|
|
200
|
+
// Suggest lighter models
|
|
201
|
+
const lighterModels = this.getSupportedGenerators().filter(name => {
|
|
202
|
+
const info = this.getGeneratorInfo(name);
|
|
203
|
+
return info &&
|
|
204
|
+
info.requirements.minimumMemory <= systemCapabilities.availableMemory;
|
|
205
|
+
});
|
|
206
|
+
if (lighterModels.length > 0) {
|
|
207
|
+
suggestions.push(`Consider lighter models: ${lighterModels.join(', ')}`);
|
|
208
|
+
}
|
|
209
|
+
}
|
|
210
|
+
}
|
|
211
|
+
// Check platform compatibility
|
|
212
|
+
if (systemCapabilities.platform) {
|
|
213
|
+
if (!modelInfo.requirements.platformSupport.includes(systemCapabilities.platform)) {
|
|
214
|
+
errors.push(`Platform '${systemCapabilities.platform}' not supported. ` +
|
|
215
|
+
`Supported platforms: ${modelInfo.requirements.platformSupport.join(', ')}`);
|
|
216
|
+
}
|
|
217
|
+
}
|
|
218
|
+
return {
|
|
219
|
+
isValid: errors.length === 0,
|
|
220
|
+
errors,
|
|
221
|
+
warnings,
|
|
222
|
+
suggestions
|
|
223
|
+
};
|
|
224
|
+
}
|
|
225
|
+
}
|
|
226
|
+
// =============================================================================
|
|
227
|
+
// UTILITY FUNCTIONS
|
|
228
|
+
// =============================================================================
|
|
229
|
+
/**
|
|
230
|
+
* Gets the generator type for a given model name
|
|
231
|
+
* @param modelName - Name of the model
|
|
232
|
+
* @returns Generator type or null if model not supported
|
|
233
|
+
*/
|
|
234
|
+
export function getGeneratorType(modelName) {
|
|
235
|
+
const modelInfo = GeneratorRegistry.getGeneratorInfo(modelName);
|
|
236
|
+
return modelInfo ? modelInfo.type : null;
|
|
237
|
+
}
|
|
238
|
+
/**
|
|
239
|
+
* Checks if a model is an instruction-tuned model
|
|
240
|
+
* @param modelName - Name of the model
|
|
241
|
+
* @returns True if the model is instruction-tuned
|
|
242
|
+
*/
|
|
243
|
+
export function isInstructionTunedModel(modelName) {
|
|
244
|
+
const modelInfo = GeneratorRegistry.getGeneratorInfo(modelName);
|
|
245
|
+
return modelInfo ? modelInfo.capabilities.instructionTuned : false;
|
|
246
|
+
}
|
|
247
|
+
/**
|
|
248
|
+
* Gets the maximum context length for a generator model
|
|
249
|
+
* @param modelName - Name of the model
|
|
250
|
+
* @returns Maximum context length or null if model not supported
|
|
251
|
+
*/
|
|
252
|
+
export function getMaxContextLength(modelName) {
|
|
253
|
+
const modelInfo = GeneratorRegistry.getGeneratorInfo(modelName);
|
|
254
|
+
return modelInfo ? modelInfo.capabilities.maxContextLength : null;
|
|
255
|
+
}
|
|
256
|
+
/**
|
|
257
|
+
* Gets recommended generation settings for a model
|
|
258
|
+
* @param modelName - Name of the model
|
|
259
|
+
* @returns Recommended settings or null if model not supported
|
|
260
|
+
*/
|
|
261
|
+
export function getRecommendedSettings(modelName) {
|
|
262
|
+
const modelInfo = GeneratorRegistry.getGeneratorInfo(modelName);
|
|
263
|
+
if (!modelInfo)
|
|
264
|
+
return null;
|
|
265
|
+
return {
|
|
266
|
+
temperature: modelInfo.capabilities.recommendedTemperature,
|
|
267
|
+
maxTokens: modelInfo.capabilities.defaultMaxOutputTokens,
|
|
268
|
+
maxChunksForContext: modelInfo.capabilities.defaultMaxChunksForContext
|
|
269
|
+
};
|
|
270
|
+
}
|
|
271
|
+
/**
|
|
272
|
+
* Gets the default maximum chunks for context for a generator model
|
|
273
|
+
* @param modelName - Name of the model
|
|
274
|
+
* @returns Default max chunks for context or null if model not supported
|
|
275
|
+
*/
|
|
276
|
+
export function getDefaultMaxChunksForContext(modelName) {
|
|
277
|
+
const modelInfo = GeneratorRegistry.getGeneratorInfo(modelName);
|
|
278
|
+
return modelInfo ? modelInfo.capabilities.defaultMaxChunksForContext : null;
|
|
279
|
+
}
|
|
280
|
+
//# sourceMappingURL=generator-registry.js.map
|
package/dist/esm/core/index.d.ts
CHANGED
|
@@ -56,4 +56,8 @@ export * from './path-manager.js';
|
|
|
56
56
|
export { ContentManager, type MemoryContentMetadata, type ContentIngestionResult, type ContentManagerConfig } from './content-manager.js';
|
|
57
57
|
export { ContentResolver, type ContentRequest, type ContentResult } from './content-resolver.js';
|
|
58
58
|
export * from './error-handler.js';
|
|
59
|
+
export { type ResponseGenerator, type GeneratorModelType, type GenerationRequest, type GenerationResult, type GeneratorCapabilities, type GeneratorRequirements, type GeneratorModelInfo, type GeneratorValidationResult, type GeneratorCreationOptions, type GenerateFunction, type CreateGeneratorFunction, GeneratorValidationError, GenerationError, ContextWindowError, supportsStreaming, isInstructModel, createGenerateFunction } from './response-generator.js';
|
|
60
|
+
export { SUPPORTED_GENERATORS, DEFAULT_GENERATOR_MODEL, GeneratorRegistry, getGeneratorType, isInstructionTunedModel, getMaxContextLength, getRecommendedSettings, getDefaultMaxChunksForContext } from './generator-registry.js';
|
|
61
|
+
export { DEFAULT_SYSTEM_PROMPT, DEFAULT_SYSTEM_PROMPT_WITH_ATTRIBUTION, SMOLLM2_CHAT_TEMPLATE, formatContextChunks, buildPrompt, estimateTokenCount, calculateContextBudget, getDefaultStopSequences, type ContextFormattingOptions, type FormattedContext, type PromptBuildOptions, type BuiltPrompt } from './prompt-templates.js';
|
|
62
|
+
export { BaseResponseGenerator, createGeneratorOptions, type GeneratorOptions } from './abstract-generator.js';
|
|
59
63
|
//# sourceMappingURL=index.d.ts.map
|
package/dist/esm/core/index.js
CHANGED
|
@@ -66,4 +66,15 @@ export { ContentManager } from './content-manager.js';
|
|
|
66
66
|
export { ContentResolver } from './content-resolver.js';
|
|
67
67
|
// Error handling framework - supports implementation-specific error contexts
|
|
68
68
|
export * from './error-handler.js';
|
|
69
|
+
// =============================================================================
|
|
70
|
+
// EXPERIMENTAL: Response Generation
|
|
71
|
+
// =============================================================================
|
|
72
|
+
// Response generation types and interfaces (experimental)
|
|
73
|
+
export { GeneratorValidationError, GenerationError, ContextWindowError, supportsStreaming, isInstructModel, createGenerateFunction } from './response-generator.js';
|
|
74
|
+
// Generator registry (experimental)
|
|
75
|
+
export { SUPPORTED_GENERATORS, DEFAULT_GENERATOR_MODEL, GeneratorRegistry, getGeneratorType, isInstructionTunedModel, getMaxContextLength, getRecommendedSettings, getDefaultMaxChunksForContext } from './generator-registry.js';
|
|
76
|
+
// Prompt templates for generation (experimental)
|
|
77
|
+
export { DEFAULT_SYSTEM_PROMPT, DEFAULT_SYSTEM_PROMPT_WITH_ATTRIBUTION, SMOLLM2_CHAT_TEMPLATE, formatContextChunks, buildPrompt, estimateTokenCount, calculateContextBudget, getDefaultStopSequences } from './prompt-templates.js';
|
|
78
|
+
// Abstract base generator (experimental)
|
|
79
|
+
export { BaseResponseGenerator, createGeneratorOptions } from './abstract-generator.js';
|
|
69
80
|
//# sourceMappingURL=index.js.map
|
|
@@ -8,6 +8,7 @@
|
|
|
8
8
|
import '../dom-polyfills.js';
|
|
9
9
|
import type { UniversalEmbedder } from './universal-embedder.js';
|
|
10
10
|
import type { RerankFunction } from './interfaces.js';
|
|
11
|
+
import type { ResponseGenerator } from './response-generator.js';
|
|
11
12
|
/**
|
|
12
13
|
* Lazy loader for embedder implementations
|
|
13
14
|
* Only loads the specific embedder type when needed
|
|
@@ -42,6 +43,42 @@ export declare class LazyEmbedderLoader {
|
|
|
42
43
|
multimodalEmbedders: number;
|
|
43
44
|
};
|
|
44
45
|
}
|
|
46
|
+
/**
|
|
47
|
+
* Lazy loader for response generator implementations
|
|
48
|
+
* Only loads the specific generator type when needed
|
|
49
|
+
*
|
|
50
|
+
* @experimental This feature is experimental and may change in future versions.
|
|
51
|
+
*/
|
|
52
|
+
export declare class LazyGeneratorLoader {
|
|
53
|
+
private static cache;
|
|
54
|
+
/**
|
|
55
|
+
* Lazily load and create an instruct generator (SmolLM2-Instruct)
|
|
56
|
+
* Only imports the module when generation is actually requested
|
|
57
|
+
*/
|
|
58
|
+
static loadInstructGenerator(modelName: string, options?: any): Promise<ResponseGenerator>;
|
|
59
|
+
/**
|
|
60
|
+
* Lazily load and create a causal LM generator (DistilGPT2)
|
|
61
|
+
* Only imports the module when generation is actually requested
|
|
62
|
+
*/
|
|
63
|
+
static loadCausalLMGenerator(modelName: string, options?: any): Promise<ResponseGenerator>;
|
|
64
|
+
/**
|
|
65
|
+
* Check if a generator is already loaded in cache
|
|
66
|
+
*/
|
|
67
|
+
static isGeneratorLoaded(modelName: string, modelType: 'instruct' | 'causal-lm'): boolean;
|
|
68
|
+
/**
|
|
69
|
+
* Remove a generator from the cache (called when generator is cleaned up)
|
|
70
|
+
*/
|
|
71
|
+
static removeGeneratorFromCache(modelName: string, modelType: 'instruct' | 'causal-lm'): void;
|
|
72
|
+
/**
|
|
73
|
+
* Get statistics about loaded generators
|
|
74
|
+
*/
|
|
75
|
+
static getLoadingStats(): {
|
|
76
|
+
loadedGenerators: string[];
|
|
77
|
+
totalLoaded: number;
|
|
78
|
+
instructGenerators: number;
|
|
79
|
+
causalLMGenerators: number;
|
|
80
|
+
};
|
|
81
|
+
}
|
|
45
82
|
/**
|
|
46
83
|
* Lazy loader for reranking implementations
|
|
47
84
|
* Only loads the specific reranker type when needed
|
|
@@ -107,6 +144,11 @@ export declare class LazyMultimodalLoader {
|
|
|
107
144
|
* Provides a single entry point for dependency management
|
|
108
145
|
*/
|
|
109
146
|
export declare class LazyDependencyManager {
|
|
147
|
+
/**
|
|
148
|
+
* Load response generator based on model type with lazy loading
|
|
149
|
+
* @experimental This feature is experimental and may change in future versions.
|
|
150
|
+
*/
|
|
151
|
+
static loadGenerator(modelName: string, modelType: 'instruct' | 'causal-lm', options?: any): Promise<ResponseGenerator>;
|
|
110
152
|
/**
|
|
111
153
|
* Load embedder based on model type with lazy loading
|
|
112
154
|
*/
|
|
@@ -121,6 +163,7 @@ export declare class LazyDependencyManager {
|
|
|
121
163
|
static getLoadingStatistics(): {
|
|
122
164
|
embedders: ReturnType<typeof LazyEmbedderLoader.getLoadingStats>;
|
|
123
165
|
rerankers: ReturnType<typeof LazyRerankerLoader.getLoadingStats>;
|
|
166
|
+
generators: ReturnType<typeof LazyGeneratorLoader.getLoadingStats>;
|
|
124
167
|
multimodal: ReturnType<typeof LazyMultimodalLoader.getMultimodalLoadingStatus>;
|
|
125
168
|
totalModulesLoaded: number;
|
|
126
169
|
memoryImpact: 'low' | 'medium' | 'high';
|
|
@@ -149,6 +149,99 @@ export class LazyEmbedderLoader {
|
|
|
149
149
|
}
|
|
150
150
|
}
|
|
151
151
|
// =============================================================================
|
|
152
|
+
// LAZY GENERATOR LOADING
|
|
153
|
+
// =============================================================================
|
|
154
|
+
/**
|
|
155
|
+
* Lazy loader for response generator implementations
|
|
156
|
+
* Only loads the specific generator type when needed
|
|
157
|
+
*
|
|
158
|
+
* @experimental This feature is experimental and may change in future versions.
|
|
159
|
+
*/
|
|
160
|
+
export class LazyGeneratorLoader {
|
|
161
|
+
static cache = LazyLoadingCache.getInstance();
|
|
162
|
+
/**
|
|
163
|
+
* Lazily load and create an instruct generator (SmolLM2-Instruct)
|
|
164
|
+
* Only imports the module when generation is actually requested
|
|
165
|
+
*/
|
|
166
|
+
static async loadInstructGenerator(modelName, options = {}) {
|
|
167
|
+
const cacheKey = `generator:instruct:${modelName}`;
|
|
168
|
+
return this.cache.getOrLoad(cacheKey, async () => {
|
|
169
|
+
try {
|
|
170
|
+
console.log(`🔄 [EXPERIMENTAL] Lazy loading instruct generator: ${modelName}`);
|
|
171
|
+
// Dynamic import - only loaded when generation is requested
|
|
172
|
+
const { InstructGenerator } = await import('../text/generators/instruct-generator.js');
|
|
173
|
+
const generator = new InstructGenerator(modelName, options);
|
|
174
|
+
await generator.loadModel();
|
|
175
|
+
console.log(`✅ Instruct generator loaded: ${modelName}`);
|
|
176
|
+
return generator;
|
|
177
|
+
}
|
|
178
|
+
catch (error) {
|
|
179
|
+
const enhancedError = createError.model(`Failed to lazy load instruct generator '${modelName}': ${error instanceof Error ? error.message : 'Unknown error'}`);
|
|
180
|
+
handleError(enhancedError, 'LazyGeneratorLoader', {
|
|
181
|
+
severity: ErrorSeverity.ERROR,
|
|
182
|
+
category: ErrorCategory.MODEL
|
|
183
|
+
});
|
|
184
|
+
throw enhancedError;
|
|
185
|
+
}
|
|
186
|
+
});
|
|
187
|
+
}
|
|
188
|
+
/**
|
|
189
|
+
* Lazily load and create a causal LM generator (DistilGPT2)
|
|
190
|
+
* Only imports the module when generation is actually requested
|
|
191
|
+
*/
|
|
192
|
+
static async loadCausalLMGenerator(modelName, options = {}) {
|
|
193
|
+
const cacheKey = `generator:causal-lm:${modelName}`;
|
|
194
|
+
return this.cache.getOrLoad(cacheKey, async () => {
|
|
195
|
+
try {
|
|
196
|
+
console.log(`🔄 [EXPERIMENTAL] Lazy loading causal LM generator: ${modelName}`);
|
|
197
|
+
// Dynamic import - only loaded when generation is requested
|
|
198
|
+
const { CausalLMGenerator } = await import('../text/generators/causal-lm-generator.js');
|
|
199
|
+
const generator = new CausalLMGenerator(modelName, options);
|
|
200
|
+
await generator.loadModel();
|
|
201
|
+
console.log(`✅ Causal LM generator loaded: ${modelName}`);
|
|
202
|
+
return generator;
|
|
203
|
+
}
|
|
204
|
+
catch (error) {
|
|
205
|
+
const enhancedError = createError.model(`Failed to lazy load causal LM generator '${modelName}': ${error instanceof Error ? error.message : 'Unknown error'}`);
|
|
206
|
+
handleError(enhancedError, 'LazyGeneratorLoader', {
|
|
207
|
+
severity: ErrorSeverity.ERROR,
|
|
208
|
+
category: ErrorCategory.MODEL
|
|
209
|
+
});
|
|
210
|
+
throw enhancedError;
|
|
211
|
+
}
|
|
212
|
+
});
|
|
213
|
+
}
|
|
214
|
+
/**
|
|
215
|
+
* Check if a generator is already loaded in cache
|
|
216
|
+
*/
|
|
217
|
+
static isGeneratorLoaded(modelName, modelType) {
|
|
218
|
+
const cacheKey = `generator:${modelType}:${modelName}`;
|
|
219
|
+
return this.cache.getLoadedModules().includes(cacheKey);
|
|
220
|
+
}
|
|
221
|
+
/**
|
|
222
|
+
* Remove a generator from the cache (called when generator is cleaned up)
|
|
223
|
+
*/
|
|
224
|
+
static removeGeneratorFromCache(modelName, modelType) {
|
|
225
|
+
const cacheKey = `generator:${modelType}:${modelName}`;
|
|
226
|
+
this.cache.remove(cacheKey);
|
|
227
|
+
console.log(`🧹 Removed generator from cache: ${cacheKey}`);
|
|
228
|
+
}
|
|
229
|
+
/**
|
|
230
|
+
* Get statistics about loaded generators
|
|
231
|
+
*/
|
|
232
|
+
static getLoadingStats() {
|
|
233
|
+
const loadedModules = this.cache.getLoadedModules().filter(key => key.startsWith('generator:'));
|
|
234
|
+
const instructGenerators = loadedModules.filter(key => key.includes(':instruct:')).length;
|
|
235
|
+
const causalLMGenerators = loadedModules.filter(key => key.includes(':causal-lm:')).length;
|
|
236
|
+
return {
|
|
237
|
+
loadedGenerators: loadedModules,
|
|
238
|
+
totalLoaded: loadedModules.length,
|
|
239
|
+
instructGenerators,
|
|
240
|
+
causalLMGenerators
|
|
241
|
+
};
|
|
242
|
+
}
|
|
243
|
+
}
|
|
244
|
+
// =============================================================================
|
|
152
245
|
// LAZY RERANKER LOADING
|
|
153
246
|
// =============================================================================
|
|
154
247
|
/**
|
|
@@ -332,6 +425,20 @@ export class LazyMultimodalLoader {
|
|
|
332
425
|
* Provides a single entry point for dependency management
|
|
333
426
|
*/
|
|
334
427
|
export class LazyDependencyManager {
|
|
428
|
+
/**
|
|
429
|
+
* Load response generator based on model type with lazy loading
|
|
430
|
+
* @experimental This feature is experimental and may change in future versions.
|
|
431
|
+
*/
|
|
432
|
+
static async loadGenerator(modelName, modelType, options = {}) {
|
|
433
|
+
switch (modelType) {
|
|
434
|
+
case 'instruct':
|
|
435
|
+
return LazyGeneratorLoader.loadInstructGenerator(modelName, options);
|
|
436
|
+
case 'causal-lm':
|
|
437
|
+
return LazyGeneratorLoader.loadCausalLMGenerator(modelName, options);
|
|
438
|
+
default:
|
|
439
|
+
throw createError.validation(`Unsupported generator model type for lazy loading: ${modelType}`);
|
|
440
|
+
}
|
|
441
|
+
}
|
|
335
442
|
/**
|
|
336
443
|
* Load embedder based on model type with lazy loading
|
|
337
444
|
*/
|
|
@@ -367,19 +474,21 @@ export class LazyDependencyManager {
|
|
|
367
474
|
static getLoadingStatistics() {
|
|
368
475
|
const embedderStats = LazyEmbedderLoader.getLoadingStats();
|
|
369
476
|
const rerankerStats = LazyRerankerLoader.getLoadingStats();
|
|
477
|
+
const generatorStats = LazyGeneratorLoader.getLoadingStats();
|
|
370
478
|
const multimodalStats = LazyMultimodalLoader.getMultimodalLoadingStatus();
|
|
371
|
-
const totalModules = embedderStats.totalLoaded + rerankerStats.totalLoaded + multimodalStats.loadedProcessors.length;
|
|
479
|
+
const totalModules = embedderStats.totalLoaded + rerankerStats.totalLoaded + generatorStats.totalLoaded + multimodalStats.loadedProcessors.length;
|
|
372
480
|
// Estimate memory impact based on loaded modules
|
|
373
481
|
let memoryImpact = 'low';
|
|
374
482
|
if (embedderStats.multimodalEmbedders > 0 || multimodalStats.imageToTextLoaded) {
|
|
375
483
|
memoryImpact = 'high';
|
|
376
484
|
}
|
|
377
|
-
else if (totalModules > 2) {
|
|
485
|
+
else if (totalModules > 2 || generatorStats.totalLoaded > 0) {
|
|
378
486
|
memoryImpact = 'medium';
|
|
379
487
|
}
|
|
380
488
|
return {
|
|
381
489
|
embedders: embedderStats,
|
|
382
490
|
rerankers: rerankerStats,
|
|
491
|
+
generators: generatorStats,
|
|
383
492
|
multimodal: multimodalStats,
|
|
384
493
|
totalModulesLoaded: totalModules,
|
|
385
494
|
memoryImpact
|
|
@@ -0,0 +1,138 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* CORE MODULE — Prompt Templates for RAG Response Generation
|
|
3
|
+
*
|
|
4
|
+
* Provides prompt engineering utilities for different generator model types.
|
|
5
|
+
* Handles context formatting, token budget management, and system prompts.
|
|
6
|
+
*
|
|
7
|
+
* PROMPT STRATEGIES:
|
|
8
|
+
* - Instruct models: Use chat template with system/user/assistant roles
|
|
9
|
+
* - Causal LM models: Use simple document + question format
|
|
10
|
+
*
|
|
11
|
+
* @experimental This feature is experimental and may change in future versions.
|
|
12
|
+
*/
|
|
13
|
+
import type { SearchResult } from './types.js';
|
|
14
|
+
import type { GeneratorModelType } from './response-generator.js';
|
|
15
|
+
/**
|
|
16
|
+
* Default system prompt for instruct models
|
|
17
|
+
* Emphasizes grounded responses using only provided context
|
|
18
|
+
*/
|
|
19
|
+
export declare const DEFAULT_SYSTEM_PROMPT = "You are a helpful assistant that answers questions based ONLY on the provided context documents. Follow these rules strictly:\n\n1. Answer ONLY using information found in the context documents\n2. If the answer cannot be found in the context, say \"I cannot find this information in the provided documents\"\n3. Do not make up information or use external knowledge\n4. Be concise and direct in your response\n5. If the context is incomplete or unclear, acknowledge this limitation";
|
|
20
|
+
/**
|
|
21
|
+
* Default system prompt for RAG with source attribution
|
|
22
|
+
*/
|
|
23
|
+
export declare const DEFAULT_SYSTEM_PROMPT_WITH_ATTRIBUTION = "You are a helpful assistant that answers questions based ONLY on the provided context documents. Follow these rules strictly:\n\n1. Answer ONLY using information found in the context documents\n2. When possible, mention which document the information comes from\n3. If the answer cannot be found in the context, say \"I cannot find this information in the provided documents\"\n4. Do not make up information or use external knowledge\n5. Be concise and direct in your response";
|
|
24
|
+
/**
|
|
25
|
+
* SmolLM2 chat template format
|
|
26
|
+
* Uses <|im_start|> and <|im_end|> tokens
|
|
27
|
+
*/
|
|
28
|
+
export declare const SMOLLM2_CHAT_TEMPLATE: {
|
|
29
|
+
systemStart: string;
|
|
30
|
+
systemEnd: string;
|
|
31
|
+
userStart: string;
|
|
32
|
+
userEnd: string;
|
|
33
|
+
assistantStart: string;
|
|
34
|
+
assistantEnd: string;
|
|
35
|
+
endOfText: string;
|
|
36
|
+
};
|
|
37
|
+
/**
|
|
38
|
+
* Options for formatting context chunks
|
|
39
|
+
*/
|
|
40
|
+
export interface ContextFormattingOptions {
|
|
41
|
+
/** Maximum tokens available for context */
|
|
42
|
+
maxContextTokens: number;
|
|
43
|
+
/** Include document titles/sources */
|
|
44
|
+
includeDocumentInfo?: boolean;
|
|
45
|
+
/** Include relevance scores */
|
|
46
|
+
includeScores?: boolean;
|
|
47
|
+
/** Separator between chunks */
|
|
48
|
+
chunkSeparator?: string;
|
|
49
|
+
/** Token estimation function (chars to tokens ratio) */
|
|
50
|
+
tokenEstimationRatio?: number;
|
|
51
|
+
}
|
|
52
|
+
/**
|
|
53
|
+
* Result of context formatting
|
|
54
|
+
*/
|
|
55
|
+
export interface FormattedContext {
|
|
56
|
+
/** Formatted context string */
|
|
57
|
+
text: string;
|
|
58
|
+
/** Estimated token count */
|
|
59
|
+
estimatedTokens: number;
|
|
60
|
+
/** Number of chunks included */
|
|
61
|
+
chunksIncluded: number;
|
|
62
|
+
/** Total chunks available */
|
|
63
|
+
totalChunks: number;
|
|
64
|
+
/** Whether context was truncated */
|
|
65
|
+
truncated: boolean;
|
|
66
|
+
}
|
|
67
|
+
/**
|
|
68
|
+
* Format search result chunks into context string for the prompt
|
|
69
|
+
*
|
|
70
|
+
* @param chunks - Search result chunks to format
|
|
71
|
+
* @param options - Formatting options
|
|
72
|
+
* @returns Formatted context with metadata
|
|
73
|
+
*/
|
|
74
|
+
export declare function formatContextChunks(chunks: SearchResult[], options: ContextFormattingOptions): FormattedContext;
|
|
75
|
+
/**
|
|
76
|
+
* Options for building the complete prompt
|
|
77
|
+
*/
|
|
78
|
+
export interface PromptBuildOptions {
|
|
79
|
+
/** User's query */
|
|
80
|
+
query: string;
|
|
81
|
+
/** Search result chunks */
|
|
82
|
+
chunks: SearchResult[];
|
|
83
|
+
/** Generator model type */
|
|
84
|
+
modelType: GeneratorModelType;
|
|
85
|
+
/** Custom system prompt (optional) */
|
|
86
|
+
systemPrompt?: string;
|
|
87
|
+
/** Maximum context window tokens */
|
|
88
|
+
maxContextLength: number;
|
|
89
|
+
/** Tokens reserved for output */
|
|
90
|
+
reservedOutputTokens: number;
|
|
91
|
+
/** Include source attribution hint */
|
|
92
|
+
includeSourceAttribution?: boolean;
|
|
93
|
+
}
|
|
94
|
+
/**
|
|
95
|
+
* Result of prompt building
|
|
96
|
+
*/
|
|
97
|
+
export interface BuiltPrompt {
|
|
98
|
+
/** Complete prompt string */
|
|
99
|
+
prompt: string;
|
|
100
|
+
/** Estimated total tokens */
|
|
101
|
+
estimatedTokens: number;
|
|
102
|
+
/** Context metadata */
|
|
103
|
+
contextInfo: FormattedContext;
|
|
104
|
+
/** System prompt used (if any) */
|
|
105
|
+
systemPromptUsed?: string;
|
|
106
|
+
}
|
|
107
|
+
/**
|
|
108
|
+
* Build a complete prompt for the generator model
|
|
109
|
+
*
|
|
110
|
+
* @param options - Prompt building options
|
|
111
|
+
* @returns Built prompt with metadata
|
|
112
|
+
*/
|
|
113
|
+
export declare function buildPrompt(options: PromptBuildOptions): BuiltPrompt;
|
|
114
|
+
/**
|
|
115
|
+
* Estimate token count for a string
|
|
116
|
+
* Uses a simple character-based heuristic (~4 chars per token for English)
|
|
117
|
+
*
|
|
118
|
+
* @param text - Text to estimate tokens for
|
|
119
|
+
* @returns Estimated token count
|
|
120
|
+
*/
|
|
121
|
+
export declare function estimateTokenCount(text: string): number;
|
|
122
|
+
/**
|
|
123
|
+
* Calculate available context budget
|
|
124
|
+
*
|
|
125
|
+
* @param maxContextLength - Maximum context window size
|
|
126
|
+
* @param reservedOutputTokens - Tokens reserved for generation
|
|
127
|
+
* @param promptOverhead - Tokens used by prompt formatting
|
|
128
|
+
* @returns Available tokens for context chunks
|
|
129
|
+
*/
|
|
130
|
+
export declare function calculateContextBudget(maxContextLength: number, reservedOutputTokens: number, promptOverhead?: number): number;
|
|
131
|
+
/**
|
|
132
|
+
* Get default stop sequences for a model type
|
|
133
|
+
*
|
|
134
|
+
* @param modelType - Generator model type
|
|
135
|
+
* @returns Array of stop sequences
|
|
136
|
+
*/
|
|
137
|
+
export declare function getDefaultStopSequences(modelType: GeneratorModelType): string[];
|
|
138
|
+
//# sourceMappingURL=prompt-templates.d.ts.map
|