rag-lite-ts 2.3.0 → 2.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (65) hide show
  1. package/dist/cjs/cli/search.js +77 -2
  2. package/dist/cjs/cli.js +28 -1
  3. package/dist/cjs/core/abstract-generator.d.ts +97 -0
  4. package/dist/cjs/core/abstract-generator.js +222 -0
  5. package/dist/cjs/core/binary-index-format.js +47 -7
  6. package/dist/cjs/core/generator-registry.d.ts +114 -0
  7. package/dist/cjs/core/generator-registry.js +280 -0
  8. package/dist/cjs/core/index.d.ts +4 -0
  9. package/dist/cjs/core/index.js +11 -0
  10. package/dist/cjs/core/lazy-dependency-loader.d.ts +43 -0
  11. package/dist/cjs/core/lazy-dependency-loader.js +111 -2
  12. package/dist/cjs/core/prompt-templates.d.ts +138 -0
  13. package/dist/cjs/core/prompt-templates.js +225 -0
  14. package/dist/cjs/core/response-generator.d.ts +132 -0
  15. package/dist/cjs/core/response-generator.js +69 -0
  16. package/dist/cjs/core/search.d.ts +72 -1
  17. package/dist/cjs/core/search.js +79 -6
  18. package/dist/cjs/core/types.d.ts +1 -0
  19. package/dist/cjs/core/vector-index-worker.js +10 -0
  20. package/dist/cjs/core/vector-index.js +69 -19
  21. package/dist/cjs/factories/generator-factory.d.ts +88 -0
  22. package/dist/cjs/factories/generator-factory.js +151 -0
  23. package/dist/cjs/factories/index.d.ts +1 -0
  24. package/dist/cjs/factories/index.js +5 -0
  25. package/dist/cjs/index.d.ts +9 -0
  26. package/dist/cjs/index.js +16 -0
  27. package/dist/cjs/text/generators/causal-lm-generator.d.ts +65 -0
  28. package/dist/cjs/text/generators/causal-lm-generator.js +197 -0
  29. package/dist/cjs/text/generators/index.d.ts +10 -0
  30. package/dist/cjs/text/generators/index.js +10 -0
  31. package/dist/cjs/text/generators/instruct-generator.d.ts +62 -0
  32. package/dist/cjs/text/generators/instruct-generator.js +192 -0
  33. package/dist/esm/cli/search.js +77 -2
  34. package/dist/esm/cli.js +28 -1
  35. package/dist/esm/core/abstract-generator.d.ts +97 -0
  36. package/dist/esm/core/abstract-generator.js +222 -0
  37. package/dist/esm/core/binary-index-format.js +47 -7
  38. package/dist/esm/core/generator-registry.d.ts +114 -0
  39. package/dist/esm/core/generator-registry.js +280 -0
  40. package/dist/esm/core/index.d.ts +4 -0
  41. package/dist/esm/core/index.js +11 -0
  42. package/dist/esm/core/lazy-dependency-loader.d.ts +43 -0
  43. package/dist/esm/core/lazy-dependency-loader.js +111 -2
  44. package/dist/esm/core/prompt-templates.d.ts +138 -0
  45. package/dist/esm/core/prompt-templates.js +225 -0
  46. package/dist/esm/core/response-generator.d.ts +132 -0
  47. package/dist/esm/core/response-generator.js +69 -0
  48. package/dist/esm/core/search.d.ts +72 -1
  49. package/dist/esm/core/search.js +79 -6
  50. package/dist/esm/core/types.d.ts +1 -0
  51. package/dist/esm/core/vector-index-worker.js +10 -0
  52. package/dist/esm/core/vector-index.js +69 -19
  53. package/dist/esm/factories/generator-factory.d.ts +88 -0
  54. package/dist/esm/factories/generator-factory.js +151 -0
  55. package/dist/esm/factories/index.d.ts +1 -0
  56. package/dist/esm/factories/index.js +5 -0
  57. package/dist/esm/index.d.ts +9 -0
  58. package/dist/esm/index.js +16 -0
  59. package/dist/esm/text/generators/causal-lm-generator.d.ts +65 -0
  60. package/dist/esm/text/generators/causal-lm-generator.js +197 -0
  61. package/dist/esm/text/generators/index.d.ts +10 -0
  62. package/dist/esm/text/generators/index.js +10 -0
  63. package/dist/esm/text/generators/instruct-generator.d.ts +62 -0
  64. package/dist/esm/text/generators/instruct-generator.js +192 -0
  65. package/package.json +1 -1
@@ -0,0 +1,97 @@
1
+ /**
2
+ * CORE MODULE — Abstract Base Generator
3
+ *
4
+ * Provides model-agnostic base functionality for all generator implementations.
5
+ * This is an abstract base class, not a concrete implementation.
6
+ *
7
+ * ARCHITECTURAL NOTE:
8
+ * Similar to BaseUniversalEmbedder, this class provides shared infrastructure:
9
+ * - Model lifecycle management (loading, cleanup, disposal)
10
+ * - Token budget management
11
+ * - Error handling with helpful messages
12
+ * - Common utility methods
13
+ *
14
+ * IMPLEMENTATION LAYERS:
15
+ * - Text: InstructGenerator extends this class (SmolLM2-Instruct)
16
+ * - Text: CausalLMGenerator extends this class (DistilGPT2)
17
+ *
18
+ * @experimental This feature is experimental and may change in future versions.
19
+ */
20
+ import type { ResponseGenerator, GeneratorModelInfo, GeneratorModelType, GenerationRequest, GenerationResult, GeneratorCreationOptions } from './response-generator.js';
21
+ import { GenerationError } from './response-generator.js';
22
+ /**
23
+ * Abstract base class for response generators
24
+ * Provides common functionality and lifecycle management
25
+ */
26
+ export declare abstract class BaseResponseGenerator implements ResponseGenerator {
27
+ readonly modelName: string;
28
+ protected _isLoaded: boolean;
29
+ protected _modelInfo: GeneratorModelInfo;
30
+ protected _options: GeneratorCreationOptions;
31
+ constructor(modelName: string, options?: GeneratorCreationOptions);
32
+ get modelType(): GeneratorModelType;
33
+ get maxContextLength(): number;
34
+ get maxOutputLength(): number;
35
+ isLoaded(): boolean;
36
+ getModelInfo(): GeneratorModelInfo;
37
+ /**
38
+ * Load the model - must be implemented by subclasses
39
+ */
40
+ abstract loadModel(): Promise<void>;
41
+ /**
42
+ * Generate text using the model - must be implemented by subclasses
43
+ * @param prompt - The formatted prompt string
44
+ * @param options - Generation options
45
+ * @returns Generated text
46
+ */
47
+ protected abstract generateText(prompt: string, options: {
48
+ maxTokens: number;
49
+ temperature: number;
50
+ topP: number;
51
+ topK: number;
52
+ repetitionPenalty: number;
53
+ stopSequences: string[];
54
+ }): Promise<{
55
+ text: string;
56
+ promptTokens: number;
57
+ completionTokens: number;
58
+ finishReason: 'complete' | 'length' | 'stop_sequence' | 'error';
59
+ }>;
60
+ /**
61
+ * Clean up resources - must be implemented by subclasses
62
+ */
63
+ abstract cleanup(): Promise<void>;
64
+ /**
65
+ * Generate a response based on query and retrieved chunks
66
+ * This method orchestrates the generation pipeline
67
+ */
68
+ generate(request: GenerationRequest): Promise<GenerationResult>;
69
+ /**
70
+ * Validate that the model is loaded before operations
71
+ */
72
+ protected ensureLoaded(): void;
73
+ /**
74
+ * Clean up response text by removing artifacts
75
+ */
76
+ protected cleanResponseText(text: string): string;
77
+ /**
78
+ * Log model loading progress
79
+ */
80
+ protected logModelLoading(stage: string, details?: string): void;
81
+ /**
82
+ * Handle model loading errors with helpful messages
83
+ */
84
+ protected handleLoadingError(error: Error): GenerationError;
85
+ }
86
+ /**
87
+ * Extended options for generator instances
88
+ */
89
+ export interface GeneratorOptions extends GeneratorCreationOptions {
90
+ /** Log level for debugging */
91
+ logLevel?: 'debug' | 'info' | 'warn' | 'error' | 'silent';
92
+ }
93
+ /**
94
+ * Create generator options with defaults
95
+ */
96
+ export declare function createGeneratorOptions(options?: Partial<GeneratorOptions>): GeneratorOptions;
97
+ //# sourceMappingURL=abstract-generator.d.ts.map
@@ -0,0 +1,222 @@
1
+ /**
2
+ * CORE MODULE — Abstract Base Generator
3
+ *
4
+ * Provides model-agnostic base functionality for all generator implementations.
5
+ * This is an abstract base class, not a concrete implementation.
6
+ *
7
+ * ARCHITECTURAL NOTE:
8
+ * Similar to BaseUniversalEmbedder, this class provides shared infrastructure:
9
+ * - Model lifecycle management (loading, cleanup, disposal)
10
+ * - Token budget management
11
+ * - Error handling with helpful messages
12
+ * - Common utility methods
13
+ *
14
+ * IMPLEMENTATION LAYERS:
15
+ * - Text: InstructGenerator extends this class (SmolLM2-Instruct)
16
+ * - Text: CausalLMGenerator extends this class (DistilGPT2)
17
+ *
18
+ * @experimental This feature is experimental and may change in future versions.
19
+ */
20
+ import { GenerationError } from './response-generator.js';
21
+ import { GeneratorRegistry } from './generator-registry.js';
22
+ import { buildPrompt, getDefaultStopSequences } from './prompt-templates.js';
23
+ // =============================================================================
24
+ // BASE GENERATOR ABSTRACT CLASS
25
+ // =============================================================================
26
+ /**
27
+ * Abstract base class for response generators
28
+ * Provides common functionality and lifecycle management
29
+ */
30
+ export class BaseResponseGenerator {
31
+ modelName;
32
+ _isLoaded = false;
33
+ _modelInfo;
34
+ _options;
35
+ constructor(modelName, options = {}) {
36
+ this.modelName = modelName;
37
+ const modelInfo = GeneratorRegistry.getGeneratorInfo(modelName);
38
+ if (!modelInfo) {
39
+ throw new Error(`Generator model '${modelName}' is not supported. ` +
40
+ `Supported models: ${GeneratorRegistry.getSupportedGenerators().join(', ')}`);
41
+ }
42
+ this._modelInfo = modelInfo;
43
+ this._options = options;
44
+ }
45
+ // =============================================================================
46
+ // PUBLIC INTERFACE IMPLEMENTATION
47
+ // =============================================================================
48
+ get modelType() {
49
+ return this._modelInfo.type;
50
+ }
51
+ get maxContextLength() {
52
+ return this._modelInfo.capabilities.maxContextLength;
53
+ }
54
+ get maxOutputLength() {
55
+ return this._modelInfo.capabilities.defaultMaxOutputTokens;
56
+ }
57
+ isLoaded() {
58
+ return this._isLoaded;
59
+ }
60
+ getModelInfo() {
61
+ return { ...this._modelInfo }; // Return a copy to prevent mutation
62
+ }
63
+ // =============================================================================
64
+ // DEFAULT IMPLEMENTATION
65
+ // =============================================================================
66
+ /**
67
+ * Generate a response based on query and retrieved chunks
68
+ * This method orchestrates the generation pipeline
69
+ */
70
+ async generate(request) {
71
+ if (!this._isLoaded) {
72
+ await this.loadModel();
73
+ }
74
+ const startTime = Date.now();
75
+ try {
76
+ // Get generation parameters with defaults
77
+ const maxTokens = request.maxTokens ?? this._modelInfo.capabilities.defaultMaxOutputTokens;
78
+ const temperature = request.temperature ?? this._modelInfo.capabilities.recommendedTemperature;
79
+ const topP = request.topP ?? 0.9;
80
+ const topK = request.topK ?? 50;
81
+ const repetitionPenalty = request.repetitionPenalty ?? 1.1;
82
+ const stopSequences = request.stopSequences ?? getDefaultStopSequences(this.modelType);
83
+ // Get max chunks for context (configurable, with model-specific default)
84
+ const maxChunksForContext = request.maxChunksForContext ??
85
+ this._modelInfo.capabilities.defaultMaxChunksForContext;
86
+ // Limit chunks to maxChunksForContext (assumes chunks are already reranked)
87
+ const totalChunks = request.chunks.length;
88
+ const limitedChunks = request.chunks.slice(0, maxChunksForContext);
89
+ if (totalChunks > maxChunksForContext) {
90
+ console.log(`📊 Using top ${maxChunksForContext} of ${totalChunks} reranked chunks for generation`);
91
+ }
92
+ // Build the prompt with context
93
+ const builtPrompt = buildPrompt({
94
+ query: request.query,
95
+ chunks: limitedChunks,
96
+ modelType: this.modelType,
97
+ systemPrompt: request.systemPrompt,
98
+ maxContextLength: this.maxContextLength,
99
+ reservedOutputTokens: maxTokens,
100
+ includeSourceAttribution: request.includeSourceAttribution
101
+ });
102
+ // Log context info
103
+ if (builtPrompt.contextInfo.truncated) {
104
+ console.warn(`⚠️ Context truncated: Only ${builtPrompt.contextInfo.chunksIncluded} of ` +
105
+ `${builtPrompt.contextInfo.totalChunks} chunks fit in context window`);
106
+ }
107
+ // Generate response
108
+ const result = await this.generateText(builtPrompt.prompt, {
109
+ maxTokens,
110
+ temperature,
111
+ topP,
112
+ topK,
113
+ repetitionPenalty,
114
+ stopSequences
115
+ });
116
+ const generationTimeMs = Date.now() - startTime;
117
+ // Clean up the response text
118
+ const cleanedResponse = this.cleanResponseText(result.text);
119
+ return {
120
+ response: cleanedResponse,
121
+ tokensUsed: result.promptTokens + result.completionTokens,
122
+ truncated: builtPrompt.contextInfo.truncated,
123
+ modelName: this.modelName,
124
+ generationTimeMs,
125
+ metadata: {
126
+ promptTokens: result.promptTokens,
127
+ completionTokens: result.completionTokens,
128
+ chunksIncluded: builtPrompt.contextInfo.chunksIncluded,
129
+ totalChunks: totalChunks, // Report original total, not limited
130
+ finishReason: result.finishReason
131
+ }
132
+ };
133
+ }
134
+ catch (error) {
135
+ const generationTimeMs = Date.now() - startTime;
136
+ if (error instanceof GenerationError) {
137
+ throw error;
138
+ }
139
+ throw new GenerationError(this.modelName, 'generation', `Generation failed: ${error instanceof Error ? error.message : 'Unknown error'}`, error instanceof Error ? error : undefined);
140
+ }
141
+ }
142
+ // =============================================================================
143
+ // PROTECTED HELPER METHODS
144
+ // =============================================================================
145
+ /**
146
+ * Validate that the model is loaded before operations
147
+ */
148
+ ensureLoaded() {
149
+ if (!this._isLoaded) {
150
+ throw new GenerationError(this.modelName, 'generation', `Model '${this.modelName}' is not loaded. Call loadModel() first.`);
151
+ }
152
+ }
153
+ /**
154
+ * Clean up response text by removing artifacts
155
+ */
156
+ cleanResponseText(text) {
157
+ let cleaned = text.trim();
158
+ // Remove common artifacts
159
+ const artifactsToRemove = [
160
+ '<|im_end|>',
161
+ '<|im_start|>',
162
+ '<|endoftext|>',
163
+ '<|assistant|>',
164
+ '<|user|>',
165
+ '<|system|>'
166
+ ];
167
+ for (const artifact of artifactsToRemove) {
168
+ cleaned = cleaned.split(artifact)[0];
169
+ }
170
+ // Remove trailing incomplete sentences (if cut off at max tokens)
171
+ if (cleaned.length > 0 && !cleaned.match(/[.!?]$/)) {
172
+ const lastSentenceEnd = Math.max(cleaned.lastIndexOf('.'), cleaned.lastIndexOf('!'), cleaned.lastIndexOf('?'));
173
+ if (lastSentenceEnd > cleaned.length * 0.5) {
174
+ cleaned = cleaned.substring(0, lastSentenceEnd + 1);
175
+ }
176
+ }
177
+ return cleaned.trim();
178
+ }
179
+ /**
180
+ * Log model loading progress
181
+ */
182
+ logModelLoading(stage, details) {
183
+ const message = `[${this.modelName}] ${stage}`;
184
+ if (details) {
185
+ console.log(`${message}: ${details}`);
186
+ }
187
+ else {
188
+ console.log(message);
189
+ }
190
+ }
191
+ /**
192
+ * Handle model loading errors with helpful messages
193
+ */
194
+ handleLoadingError(error) {
195
+ const baseMessage = `Failed to load generator model '${this.modelName}': ${error.message}`;
196
+ // Provide specific guidance based on error type
197
+ if (error.message.includes('network') || error.message.includes('fetch')) {
198
+ return new GenerationError(this.modelName, 'loading', `${baseMessage}\n` +
199
+ `This appears to be a network error. Please check your internet connection ` +
200
+ `and ensure the model repository is accessible.`, error);
201
+ }
202
+ if (error.message.includes('memory') || error.message.includes('OOM')) {
203
+ return new GenerationError(this.modelName, 'loading', `${baseMessage}\n` +
204
+ `This appears to be a memory error. The model requires ` +
205
+ `${this._modelInfo.requirements.minimumMemory}MB. Try closing other applications ` +
206
+ `or using a smaller model like 'Xenova/distilgpt2'.`, error);
207
+ }
208
+ return new GenerationError(this.modelName, 'loading', baseMessage, error);
209
+ }
210
+ }
211
+ /**
212
+ * Create generator options with defaults
213
+ */
214
+ export function createGeneratorOptions(options = {}) {
215
+ return {
216
+ timeout: 60000, // 60 seconds
217
+ enableGPU: false,
218
+ logLevel: 'info',
219
+ ...options
220
+ };
221
+ }
222
+ //# sourceMappingURL=abstract-generator.js.map
@@ -190,6 +190,9 @@ export class BinaryIndexFormat {
190
190
  const view = new DataView(buffer.buffer, buffer.byteOffset, buffer.byteLength);
191
191
  let offset = 0;
192
192
  // Read basic header (24 bytes, all little-endian)
193
+ if (buffer.byteLength < 24) {
194
+ throw new Error(`Index file too small: expected at least 24 bytes, got ${buffer.byteLength}`);
195
+ }
193
196
  const dimensions = view.getUint32(offset, true);
194
197
  offset += 4;
195
198
  const maxElements = view.getUint32(offset, true);
@@ -202,10 +205,20 @@ export class BinaryIndexFormat {
202
205
  offset += 4;
203
206
  const currentSize = view.getUint32(offset, true);
204
207
  offset += 4;
205
- // Check if this is the extended grouped format (40+ bytes header)
206
- const hasGroups = buffer.byteLength >= 40 ? view.getUint32(offset, true) : 0;
207
- if (hasGroups === 1 && buffer.byteLength >= 40) {
208
- // Load grouped format
208
+ // Calculate expected size for original format
209
+ const vectorSize = 4 + (dimensions * 4); // id + vector
210
+ const expectedOriginalSize = 24 + (currentSize * vectorSize);
211
+ // Check if this is the extended grouped format (44 bytes header)
212
+ // Extended header has: 24 bytes basic + 4 bytes hasGroups + 16 bytes for offsets/counts = 44 bytes
213
+ // Only check for grouped format if file is larger than expected original format size
214
+ const hasGroups = buffer.byteLength > expectedOriginalSize && buffer.byteLength >= 44 && offset + 4 <= buffer.byteLength
215
+ ? view.getUint32(offset, true)
216
+ : 0;
217
+ if (hasGroups === 1 && buffer.byteLength >= 44) {
218
+ // Load grouped format - ensure we have enough bytes for extended header
219
+ if (offset + 20 > buffer.byteLength) {
220
+ throw new Error(`Index file too small for grouped format: expected at least ${offset + 20} bytes, got ${buffer.byteLength}`);
221
+ }
209
222
  const textOffset = view.getUint32(offset + 4, true);
210
223
  const textCount = view.getUint32(offset + 8, true);
211
224
  const imageOffset = view.getUint32(offset + 12, true);
@@ -218,14 +231,23 @@ export class BinaryIndexFormat {
218
231
  if (offset % 4 !== 0) {
219
232
  throw new Error(`Offset ${offset} is not 4-byte aligned`);
220
233
  }
234
+ // Check bounds before reading vector ID
235
+ if (offset + 4 > buffer.byteLength) {
236
+ throw new Error(`Text vector ID at offset ${offset} is outside the bounds of the DataView (buffer size: ${buffer.byteLength})`);
237
+ }
221
238
  // Read vector ID
222
239
  const id = view.getUint32(offset, true);
223
240
  offset += 4;
241
+ // Check bounds before reading vector data
242
+ const vectorDataSize = dimensions * 4;
243
+ if (offset + vectorDataSize > buffer.byteLength) {
244
+ throw new Error(`Text vector data at offset ${offset} would exceed buffer bounds (buffer size: ${buffer.byteLength}, required: ${offset + vectorDataSize})`);
245
+ }
224
246
  // Zero-copy Float32Array view
225
247
  const vectorView = new Float32Array(buffer.buffer, buffer.byteOffset + offset, dimensions);
226
248
  // Copy to avoid buffer lifecycle issues
227
249
  const vector = new Float32Array(vectorView);
228
- offset += dimensions * 4;
250
+ offset += vectorDataSize;
229
251
  textVectors.push({ id, vector });
230
252
  }
231
253
  // Load image vectors
@@ -236,14 +258,23 @@ export class BinaryIndexFormat {
236
258
  if (offset % 4 !== 0) {
237
259
  throw new Error(`Offset ${offset} is not 4-byte aligned`);
238
260
  }
261
+ // Check bounds before reading vector ID
262
+ if (offset + 4 > buffer.byteLength) {
263
+ throw new Error(`Image vector ID at offset ${offset} is outside the bounds of the DataView (buffer size: ${buffer.byteLength})`);
264
+ }
239
265
  // Read vector ID
240
266
  const id = view.getUint32(offset, true);
241
267
  offset += 4;
268
+ // Check bounds before reading vector data
269
+ const vectorDataSize = dimensions * 4;
270
+ if (offset + vectorDataSize > buffer.byteLength) {
271
+ throw new Error(`Image vector data at offset ${offset} would exceed buffer bounds (buffer size: ${buffer.byteLength}, required: ${offset + vectorDataSize})`);
272
+ }
242
273
  // Zero-copy Float32Array view
243
274
  const vectorView = new Float32Array(buffer.buffer, buffer.byteOffset + offset, dimensions);
244
275
  // Copy to avoid buffer lifecycle issues
245
276
  const vector = new Float32Array(vectorView);
246
- offset += dimensions * 4;
277
+ offset += vectorDataSize;
247
278
  imageVectors.push({ id, vector });
248
279
  }
249
280
  // Combine all vectors for backward compatibility
@@ -269,14 +300,23 @@ export class BinaryIndexFormat {
269
300
  if (offset % 4 !== 0) {
270
301
  throw new Error(`Offset ${offset} is not 4-byte aligned`);
271
302
  }
303
+ // Check bounds before reading vector ID
304
+ if (offset + 4 > buffer.byteLength) {
305
+ throw new Error(`Offset ${offset} is outside the bounds of the DataView (buffer size: ${buffer.byteLength})`);
306
+ }
272
307
  // Read vector ID
273
308
  const id = view.getUint32(offset, true);
274
309
  offset += 4;
310
+ // Check bounds before reading vector data
311
+ const vectorDataSize = dimensions * 4;
312
+ if (offset + vectorDataSize > buffer.byteLength) {
313
+ throw new Error(`Vector data at offset ${offset} would exceed buffer bounds (buffer size: ${buffer.byteLength}, required: ${offset + vectorDataSize})`);
314
+ }
275
315
  // Zero-copy Float32Array view (fast!)
276
316
  const vectorView = new Float32Array(buffer.buffer, buffer.byteOffset + offset, dimensions);
277
317
  // Copy to avoid buffer lifecycle issues
278
318
  const vector = new Float32Array(vectorView);
279
- offset += dimensions * 4;
319
+ offset += vectorDataSize;
280
320
  vectors.push({ id, vector });
281
321
  }
282
322
  return {
@@ -0,0 +1,114 @@
1
+ /**
2
+ * CORE MODULE — Generator Registry for RAG Response Generation
3
+ *
4
+ * Centralized registry of supported generator models with validation and
5
+ * compatibility checking. Follows the same patterns as model-registry.ts.
6
+ *
7
+ * SUPPORTED MODELS:
8
+ * - HuggingFaceTB/SmolLM2-135M-Instruct: Balanced instruct model (DEFAULT, 3 chunks)
9
+ * - HuggingFaceTB/SmolLM2-360M-Instruct: Higher quality instruct model (5 chunks)
10
+ *
11
+ * PREREQUISITES:
12
+ * - Reranking must be enabled for response generation to ensure quality context
13
+ *
14
+ * @experimental This feature is experimental and may change in future versions.
15
+ */
16
+ import type { GeneratorModelInfo, GeneratorModelType, GeneratorValidationResult, GeneratorCapabilities } from './response-generator.js';
17
+ /**
18
+ * Registry of supported generator models with their metadata and capabilities
19
+ */
20
+ export declare const SUPPORTED_GENERATORS: Record<string, GeneratorModelInfo>;
21
+ /** Default generator model name */
22
+ export declare const DEFAULT_GENERATOR_MODEL = "HuggingFaceTB/SmolLM2-135M-Instruct";
23
+ /**
24
+ * Generator registry class providing validation and model information services
25
+ */
26
+ export declare class GeneratorRegistry {
27
+ /**
28
+ * Gets generator model information for a given model name
29
+ * @param modelName - Name of the generator model
30
+ * @returns Generator model information or null if not supported
31
+ */
32
+ static getGeneratorInfo(modelName: string): GeneratorModelInfo | null;
33
+ /**
34
+ * Validates a generator model name and returns compatibility information
35
+ * @param modelName - Name of the model to validate
36
+ * @returns Validation result with errors, warnings, and suggestions
37
+ */
38
+ static validateGenerator(modelName: string): GeneratorValidationResult;
39
+ /**
40
+ * Lists all supported generator models
41
+ * @param modelType - Optional filter by model type
42
+ * @returns Array of supported generator model names
43
+ */
44
+ static getSupportedGenerators(modelType?: GeneratorModelType): string[];
45
+ /**
46
+ * Gets the default generator model name
47
+ * @returns Default generator model name
48
+ */
49
+ static getDefaultGenerator(): string;
50
+ /**
51
+ * Gets generators by type
52
+ * @param type - Generator type ('causal-lm' or 'instruct')
53
+ * @returns Array of model names matching the type
54
+ */
55
+ static getGeneratorsByType(type: GeneratorModelType): string[];
56
+ /**
57
+ * Checks if a generator model supports a specific capability
58
+ * @param modelName - Name of the model
59
+ * @param capability - Capability to check
60
+ * @returns True if the model supports the capability
61
+ */
62
+ static supportsCapability(modelName: string, capability: keyof GeneratorCapabilities): boolean;
63
+ /**
64
+ * Gets generators similar to the given model name (for suggestions)
65
+ * @private
66
+ */
67
+ private static getSimilarGenerators;
68
+ /**
69
+ * Validates system compatibility for a generator model
70
+ * @param modelName - Name of the model
71
+ * @param systemCapabilities - System capabilities to check against
72
+ * @returns Validation result with compatibility information
73
+ */
74
+ static validateSystemCompatibility(modelName: string, systemCapabilities: {
75
+ availableMemory?: number;
76
+ platform?: string;
77
+ transformersJsVersion?: string;
78
+ }): GeneratorValidationResult;
79
+ }
80
+ /**
81
+ * Gets the generator type for a given model name
82
+ * @param modelName - Name of the model
83
+ * @returns Generator type or null if model not supported
84
+ */
85
+ export declare function getGeneratorType(modelName: string): GeneratorModelType | null;
86
+ /**
87
+ * Checks if a model is an instruction-tuned model
88
+ * @param modelName - Name of the model
89
+ * @returns True if the model is instruction-tuned
90
+ */
91
+ export declare function isInstructionTunedModel(modelName: string): boolean;
92
+ /**
93
+ * Gets the maximum context length for a generator model
94
+ * @param modelName - Name of the model
95
+ * @returns Maximum context length or null if model not supported
96
+ */
97
+ export declare function getMaxContextLength(modelName: string): number | null;
98
+ /**
99
+ * Gets recommended generation settings for a model
100
+ * @param modelName - Name of the model
101
+ * @returns Recommended settings or null if model not supported
102
+ */
103
+ export declare function getRecommendedSettings(modelName: string): {
104
+ temperature: number;
105
+ maxTokens: number;
106
+ maxChunksForContext: number;
107
+ } | null;
108
+ /**
109
+ * Gets the default maximum chunks for context for a generator model
110
+ * @param modelName - Name of the model
111
+ * @returns Default max chunks for context or null if model not supported
112
+ */
113
+ export declare function getDefaultMaxChunksForContext(modelName: string): number | null;
114
+ //# sourceMappingURL=generator-registry.d.ts.map