rag-lite-ts 2.3.0 → 2.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cjs/cli/search.js +77 -2
- package/dist/cjs/cli.js +28 -1
- package/dist/cjs/core/abstract-generator.d.ts +97 -0
- package/dist/cjs/core/abstract-generator.js +222 -0
- package/dist/cjs/core/binary-index-format.js +47 -7
- package/dist/cjs/core/generator-registry.d.ts +114 -0
- package/dist/cjs/core/generator-registry.js +280 -0
- package/dist/cjs/core/index.d.ts +4 -0
- package/dist/cjs/core/index.js +11 -0
- package/dist/cjs/core/lazy-dependency-loader.d.ts +43 -0
- package/dist/cjs/core/lazy-dependency-loader.js +111 -2
- package/dist/cjs/core/prompt-templates.d.ts +138 -0
- package/dist/cjs/core/prompt-templates.js +225 -0
- package/dist/cjs/core/response-generator.d.ts +132 -0
- package/dist/cjs/core/response-generator.js +69 -0
- package/dist/cjs/core/search.d.ts +72 -1
- package/dist/cjs/core/search.js +79 -6
- package/dist/cjs/core/types.d.ts +1 -0
- package/dist/cjs/core/vector-index-worker.js +10 -0
- package/dist/cjs/core/vector-index.js +69 -19
- package/dist/cjs/factories/generator-factory.d.ts +88 -0
- package/dist/cjs/factories/generator-factory.js +151 -0
- package/dist/cjs/factories/index.d.ts +1 -0
- package/dist/cjs/factories/index.js +5 -0
- package/dist/cjs/index.d.ts +9 -0
- package/dist/cjs/index.js +16 -0
- package/dist/cjs/text/generators/causal-lm-generator.d.ts +65 -0
- package/dist/cjs/text/generators/causal-lm-generator.js +197 -0
- package/dist/cjs/text/generators/index.d.ts +10 -0
- package/dist/cjs/text/generators/index.js +10 -0
- package/dist/cjs/text/generators/instruct-generator.d.ts +62 -0
- package/dist/cjs/text/generators/instruct-generator.js +192 -0
- package/dist/esm/cli/search.js +77 -2
- package/dist/esm/cli.js +28 -1
- package/dist/esm/core/abstract-generator.d.ts +97 -0
- package/dist/esm/core/abstract-generator.js +222 -0
- package/dist/esm/core/binary-index-format.js +47 -7
- package/dist/esm/core/generator-registry.d.ts +114 -0
- package/dist/esm/core/generator-registry.js +280 -0
- package/dist/esm/core/index.d.ts +4 -0
- package/dist/esm/core/index.js +11 -0
- package/dist/esm/core/lazy-dependency-loader.d.ts +43 -0
- package/dist/esm/core/lazy-dependency-loader.js +111 -2
- package/dist/esm/core/prompt-templates.d.ts +138 -0
- package/dist/esm/core/prompt-templates.js +225 -0
- package/dist/esm/core/response-generator.d.ts +132 -0
- package/dist/esm/core/response-generator.js +69 -0
- package/dist/esm/core/search.d.ts +72 -1
- package/dist/esm/core/search.js +79 -6
- package/dist/esm/core/types.d.ts +1 -0
- package/dist/esm/core/vector-index-worker.js +10 -0
- package/dist/esm/core/vector-index.js +69 -19
- package/dist/esm/factories/generator-factory.d.ts +88 -0
- package/dist/esm/factories/generator-factory.js +151 -0
- package/dist/esm/factories/index.d.ts +1 -0
- package/dist/esm/factories/index.js +5 -0
- package/dist/esm/index.d.ts +9 -0
- package/dist/esm/index.js +16 -0
- package/dist/esm/text/generators/causal-lm-generator.d.ts +65 -0
- package/dist/esm/text/generators/causal-lm-generator.js +197 -0
- package/dist/esm/text/generators/index.d.ts +10 -0
- package/dist/esm/text/generators/index.js +10 -0
- package/dist/esm/text/generators/instruct-generator.d.ts +62 -0
- package/dist/esm/text/generators/instruct-generator.js +192 -0
- package/package.json +1 -1
package/dist/cjs/cli/search.js
CHANGED
|
@@ -132,7 +132,7 @@ export async function runSearch(query, options = {}) {
|
|
|
132
132
|
process.exit(EXIT_CODES.MODEL_ERROR);
|
|
133
133
|
}
|
|
134
134
|
}
|
|
135
|
-
// Prepare search options
|
|
135
|
+
// Prepare search options (with generation support)
|
|
136
136
|
const searchOptions = {};
|
|
137
137
|
if (options['top-k'] !== undefined) {
|
|
138
138
|
searchOptions.top_k = options['top-k'];
|
|
@@ -164,9 +164,60 @@ export async function runSearch(query, options = {}) {
|
|
|
164
164
|
}
|
|
165
165
|
// Track whether reranking will actually be used in this search
|
|
166
166
|
const rerankingUsed = searchOptions.rerank === true;
|
|
167
|
+
// Handle generation options (experimental, text mode only)
|
|
168
|
+
const generateResponse = options.generate === true;
|
|
169
|
+
const generatorModel = options.generator;
|
|
170
|
+
const maxGenerationTokens = options['max-tokens'];
|
|
171
|
+
const generationTemperature = options.temperature;
|
|
172
|
+
const maxChunksForContext = options['max-chunks'];
|
|
173
|
+
// Generation only supported in text mode
|
|
174
|
+
if (generateResponse && isImage) {
|
|
175
|
+
console.warn('⚠️ [EXPERIMENTAL] Generation is only supported for text searches.');
|
|
176
|
+
console.warn(' Image search results will be returned without generation.');
|
|
177
|
+
console.warn('');
|
|
178
|
+
}
|
|
179
|
+
// Generation requires reranking - enable it automatically
|
|
180
|
+
let rerankingEnabledForGeneration = false;
|
|
181
|
+
if (generateResponse && !isImage && !searchOptions.rerank) {
|
|
182
|
+
searchOptions.rerank = true;
|
|
183
|
+
rerankingEnabledForGeneration = true;
|
|
184
|
+
console.log('📋 Reranking automatically enabled (required for generation)');
|
|
185
|
+
}
|
|
186
|
+
// Set up generator if generation is requested (text mode only)
|
|
187
|
+
let generateFn;
|
|
188
|
+
if (generateResponse && !isImage) {
|
|
189
|
+
try {
|
|
190
|
+
console.log('🤖 [EXPERIMENTAL] Initializing response generator...');
|
|
191
|
+
const { createGenerateFunctionFromModel, getDefaultGeneratorModel } = await import('../factories/generator-factory.js');
|
|
192
|
+
const { getDefaultMaxChunksForContext } = await import('../core/generator-registry.js');
|
|
193
|
+
const modelToUse = generatorModel || getDefaultGeneratorModel();
|
|
194
|
+
const defaultChunks = getDefaultMaxChunksForContext(modelToUse) || 3;
|
|
195
|
+
console.log(` Model: ${modelToUse}`);
|
|
196
|
+
console.log(` Max chunks for context: ${maxChunksForContext || defaultChunks} (default: ${defaultChunks})`);
|
|
197
|
+
generateFn = await createGenerateFunctionFromModel(modelToUse);
|
|
198
|
+
searchEngine.setGenerateFunction(generateFn);
|
|
199
|
+
console.log('✅ Generator initialized');
|
|
200
|
+
console.log('');
|
|
201
|
+
}
|
|
202
|
+
catch (error) {
|
|
203
|
+
console.error('❌ [EXPERIMENTAL] Failed to initialize generator:', error instanceof Error ? error.message : 'Unknown error');
|
|
204
|
+
console.error(' Continuing without generation...');
|
|
205
|
+
console.error('');
|
|
206
|
+
}
|
|
207
|
+
}
|
|
208
|
+
// Set generation options if generator is ready
|
|
209
|
+
if (generateFn && generateResponse && !isImage) {
|
|
210
|
+
searchOptions.generateResponse = true;
|
|
211
|
+
searchOptions.generationOptions = {
|
|
212
|
+
maxTokens: maxGenerationTokens,
|
|
213
|
+
temperature: generationTemperature,
|
|
214
|
+
maxChunksForContext: maxChunksForContext
|
|
215
|
+
};
|
|
216
|
+
}
|
|
167
217
|
// Perform search
|
|
168
218
|
const startTime = Date.now();
|
|
169
219
|
let results;
|
|
220
|
+
let generationResult;
|
|
170
221
|
if (isImage && embedder) {
|
|
171
222
|
// Image-based search: embed the image and search with the vector
|
|
172
223
|
console.log('Embedding image...');
|
|
@@ -174,8 +225,14 @@ export async function runSearch(query, options = {}) {
|
|
|
174
225
|
console.log('Searching with image embedding...');
|
|
175
226
|
results = await searchEngine.searchWithVector(imageEmbedding.vector, searchOptions);
|
|
176
227
|
}
|
|
228
|
+
else if (generateResponse && generateFn) {
|
|
229
|
+
// Text-based search with generation
|
|
230
|
+
const searchResult = await searchEngine.searchWithGeneration(query, searchOptions);
|
|
231
|
+
results = searchResult.results;
|
|
232
|
+
generationResult = searchResult.generation;
|
|
233
|
+
}
|
|
177
234
|
else {
|
|
178
|
-
//
|
|
235
|
+
// Standard text-based search
|
|
179
236
|
results = await searchEngine.search(query, searchOptions);
|
|
180
237
|
}
|
|
181
238
|
const searchTime = Date.now() - startTime;
|
|
@@ -216,6 +273,21 @@ export async function runSearch(query, options = {}) {
|
|
|
216
273
|
}
|
|
217
274
|
console.log('');
|
|
218
275
|
});
|
|
276
|
+
// Display generated response if available (experimental)
|
|
277
|
+
if (generationResult) {
|
|
278
|
+
console.log('─'.repeat(50));
|
|
279
|
+
console.log('🤖 Generated Response [EXPERIMENTAL]');
|
|
280
|
+
console.log(`Model: ${generationResult.modelUsed}`);
|
|
281
|
+
console.log('─'.repeat(50));
|
|
282
|
+
console.log('');
|
|
283
|
+
console.log(generationResult.response);
|
|
284
|
+
console.log('');
|
|
285
|
+
console.log('─'.repeat(50));
|
|
286
|
+
console.log(`⏱️ Generation: ${(generationResult.generationTimeMs / 1000).toFixed(1)}s | ` +
|
|
287
|
+
`📊 ${generationResult.tokensUsed} tokens | ` +
|
|
288
|
+
`📄 ${generationResult.chunksUsedForContext} chunks used` +
|
|
289
|
+
(generationResult.truncated ? ' (context truncated)' : ''));
|
|
290
|
+
}
|
|
219
291
|
// Show search statistics
|
|
220
292
|
const stats = await searchEngine.getStats();
|
|
221
293
|
console.log('─'.repeat(50));
|
|
@@ -233,6 +305,9 @@ export async function runSearch(query, options = {}) {
|
|
|
233
305
|
else {
|
|
234
306
|
console.log('Reranking: disabled');
|
|
235
307
|
}
|
|
308
|
+
if (generationResult) {
|
|
309
|
+
console.log('Generation: enabled [EXPERIMENTAL]');
|
|
310
|
+
}
|
|
236
311
|
}
|
|
237
312
|
}
|
|
238
313
|
finally {
|
package/dist/cjs/cli.js
CHANGED
|
@@ -54,6 +54,7 @@ Examples:
|
|
|
54
54
|
raglite search "red car" --content-type image # Search only image results
|
|
55
55
|
raglite search ./photo.jpg # Search with image (multimodal mode only)
|
|
56
56
|
raglite search ./image.png --top-k 5 # Find similar images
|
|
57
|
+
raglite search "How does auth work?" --generate # [EXPERIMENTAL] Generate AI response
|
|
57
58
|
raglite ui # Launch web interface
|
|
58
59
|
|
|
59
60
|
raglite rebuild # Rebuild the entire index
|
|
@@ -64,6 +65,13 @@ Options for search:
|
|
|
64
65
|
--no-rerank Disable reranking
|
|
65
66
|
--content-type <type> Filter results by content type: 'text', 'image', or 'all' (default: all)
|
|
66
67
|
|
|
68
|
+
[EXPERIMENTAL] AI Response Generation (text mode only):
|
|
69
|
+
--generate Generate an AI response from search results
|
|
70
|
+
--generator <model> Generator model to use (default: SmolLM2-135M-Instruct)
|
|
71
|
+
--max-tokens <n> Maximum tokens to generate (default: 512)
|
|
72
|
+
--temperature <n> Sampling temperature 0-1 (default: 0.1)
|
|
73
|
+
--max-chunks <n> Maximum chunks for context (default: 3 for 135M, 5 for 360M)
|
|
74
|
+
|
|
67
75
|
Options for ingest:
|
|
68
76
|
--model <name> Use specific embedding model
|
|
69
77
|
--mode <mode> Processing mode: 'text' (default) or 'multimodal'
|
|
@@ -83,6 +91,12 @@ Available reranking strategies (multimodal mode):
|
|
|
83
91
|
text-derived Use image-to-text conversion + cross-encoder (default)
|
|
84
92
|
disabled No reranking, use vector similarity only
|
|
85
93
|
|
|
94
|
+
[EXPERIMENTAL] Available generator models:
|
|
95
|
+
HuggingFaceTB/SmolLM2-135M-Instruct (balanced, recommended default, uses top 3 chunks)
|
|
96
|
+
HuggingFaceTB/SmolLM2-360M-Instruct (higher quality, slower, uses top 5 chunks)
|
|
97
|
+
|
|
98
|
+
Note: Generation requires reranking (--rerank is automatically enabled with --generate)
|
|
99
|
+
|
|
86
100
|
For more information, visit: https://github.com/your-repo/rag-lite-ts
|
|
87
101
|
`);
|
|
88
102
|
}
|
|
@@ -126,6 +140,10 @@ function parseArgs() {
|
|
|
126
140
|
else if (optionName === 'force-rebuild') {
|
|
127
141
|
options.forceRebuild = true;
|
|
128
142
|
}
|
|
143
|
+
else if (optionName === 'generate') {
|
|
144
|
+
// Handle --generate flag for experimental response generation
|
|
145
|
+
options.generate = true;
|
|
146
|
+
}
|
|
129
147
|
else if (optionName === 'help') {
|
|
130
148
|
return { command: 'help', args: [], options: {} };
|
|
131
149
|
}
|
|
@@ -136,7 +154,16 @@ function parseArgs() {
|
|
|
136
154
|
// Handle options with values
|
|
137
155
|
const nextArg = args[i + 1];
|
|
138
156
|
if (nextArg && !nextArg.startsWith('--')) {
|
|
139
|
-
|
|
157
|
+
// Parse numeric values for specific options
|
|
158
|
+
if (optionName === 'max-tokens' || optionName === 'top-k' || optionName === 'max-chunks') {
|
|
159
|
+
options[optionName] = parseInt(nextArg, 10);
|
|
160
|
+
}
|
|
161
|
+
else if (optionName === 'temperature') {
|
|
162
|
+
options[optionName] = parseFloat(nextArg);
|
|
163
|
+
}
|
|
164
|
+
else {
|
|
165
|
+
options[optionName] = nextArg;
|
|
166
|
+
}
|
|
140
167
|
i++; // Skip the next argument as it's the value
|
|
141
168
|
}
|
|
142
169
|
else {
|
|
@@ -0,0 +1,97 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* CORE MODULE — Abstract Base Generator
|
|
3
|
+
*
|
|
4
|
+
* Provides model-agnostic base functionality for all generator implementations.
|
|
5
|
+
* This is an abstract base class, not a concrete implementation.
|
|
6
|
+
*
|
|
7
|
+
* ARCHITECTURAL NOTE:
|
|
8
|
+
* Similar to BaseUniversalEmbedder, this class provides shared infrastructure:
|
|
9
|
+
* - Model lifecycle management (loading, cleanup, disposal)
|
|
10
|
+
* - Token budget management
|
|
11
|
+
* - Error handling with helpful messages
|
|
12
|
+
* - Common utility methods
|
|
13
|
+
*
|
|
14
|
+
* IMPLEMENTATION LAYERS:
|
|
15
|
+
* - Text: InstructGenerator extends this class (SmolLM2-Instruct)
|
|
16
|
+
* - Text: CausalLMGenerator extends this class (DistilGPT2)
|
|
17
|
+
*
|
|
18
|
+
* @experimental This feature is experimental and may change in future versions.
|
|
19
|
+
*/
|
|
20
|
+
import type { ResponseGenerator, GeneratorModelInfo, GeneratorModelType, GenerationRequest, GenerationResult, GeneratorCreationOptions } from './response-generator.js';
|
|
21
|
+
import { GenerationError } from './response-generator.js';
|
|
22
|
+
/**
|
|
23
|
+
* Abstract base class for response generators
|
|
24
|
+
* Provides common functionality and lifecycle management
|
|
25
|
+
*/
|
|
26
|
+
export declare abstract class BaseResponseGenerator implements ResponseGenerator {
|
|
27
|
+
readonly modelName: string;
|
|
28
|
+
protected _isLoaded: boolean;
|
|
29
|
+
protected _modelInfo: GeneratorModelInfo;
|
|
30
|
+
protected _options: GeneratorCreationOptions;
|
|
31
|
+
constructor(modelName: string, options?: GeneratorCreationOptions);
|
|
32
|
+
get modelType(): GeneratorModelType;
|
|
33
|
+
get maxContextLength(): number;
|
|
34
|
+
get maxOutputLength(): number;
|
|
35
|
+
isLoaded(): boolean;
|
|
36
|
+
getModelInfo(): GeneratorModelInfo;
|
|
37
|
+
/**
|
|
38
|
+
* Load the model - must be implemented by subclasses
|
|
39
|
+
*/
|
|
40
|
+
abstract loadModel(): Promise<void>;
|
|
41
|
+
/**
|
|
42
|
+
* Generate text using the model - must be implemented by subclasses
|
|
43
|
+
* @param prompt - The formatted prompt string
|
|
44
|
+
* @param options - Generation options
|
|
45
|
+
* @returns Generated text
|
|
46
|
+
*/
|
|
47
|
+
protected abstract generateText(prompt: string, options: {
|
|
48
|
+
maxTokens: number;
|
|
49
|
+
temperature: number;
|
|
50
|
+
topP: number;
|
|
51
|
+
topK: number;
|
|
52
|
+
repetitionPenalty: number;
|
|
53
|
+
stopSequences: string[];
|
|
54
|
+
}): Promise<{
|
|
55
|
+
text: string;
|
|
56
|
+
promptTokens: number;
|
|
57
|
+
completionTokens: number;
|
|
58
|
+
finishReason: 'complete' | 'length' | 'stop_sequence' | 'error';
|
|
59
|
+
}>;
|
|
60
|
+
/**
|
|
61
|
+
* Clean up resources - must be implemented by subclasses
|
|
62
|
+
*/
|
|
63
|
+
abstract cleanup(): Promise<void>;
|
|
64
|
+
/**
|
|
65
|
+
* Generate a response based on query and retrieved chunks
|
|
66
|
+
* This method orchestrates the generation pipeline
|
|
67
|
+
*/
|
|
68
|
+
generate(request: GenerationRequest): Promise<GenerationResult>;
|
|
69
|
+
/**
|
|
70
|
+
* Validate that the model is loaded before operations
|
|
71
|
+
*/
|
|
72
|
+
protected ensureLoaded(): void;
|
|
73
|
+
/**
|
|
74
|
+
* Clean up response text by removing artifacts
|
|
75
|
+
*/
|
|
76
|
+
protected cleanResponseText(text: string): string;
|
|
77
|
+
/**
|
|
78
|
+
* Log model loading progress
|
|
79
|
+
*/
|
|
80
|
+
protected logModelLoading(stage: string, details?: string): void;
|
|
81
|
+
/**
|
|
82
|
+
* Handle model loading errors with helpful messages
|
|
83
|
+
*/
|
|
84
|
+
protected handleLoadingError(error: Error): GenerationError;
|
|
85
|
+
}
|
|
86
|
+
/**
|
|
87
|
+
* Extended options for generator instances
|
|
88
|
+
*/
|
|
89
|
+
export interface GeneratorOptions extends GeneratorCreationOptions {
|
|
90
|
+
/** Log level for debugging */
|
|
91
|
+
logLevel?: 'debug' | 'info' | 'warn' | 'error' | 'silent';
|
|
92
|
+
}
|
|
93
|
+
/**
|
|
94
|
+
* Create generator options with defaults
|
|
95
|
+
*/
|
|
96
|
+
export declare function createGeneratorOptions(options?: Partial<GeneratorOptions>): GeneratorOptions;
|
|
97
|
+
//# sourceMappingURL=abstract-generator.d.ts.map
|
|
@@ -0,0 +1,222 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* CORE MODULE — Abstract Base Generator
|
|
3
|
+
*
|
|
4
|
+
* Provides model-agnostic base functionality for all generator implementations.
|
|
5
|
+
* This is an abstract base class, not a concrete implementation.
|
|
6
|
+
*
|
|
7
|
+
* ARCHITECTURAL NOTE:
|
|
8
|
+
* Similar to BaseUniversalEmbedder, this class provides shared infrastructure:
|
|
9
|
+
* - Model lifecycle management (loading, cleanup, disposal)
|
|
10
|
+
* - Token budget management
|
|
11
|
+
* - Error handling with helpful messages
|
|
12
|
+
* - Common utility methods
|
|
13
|
+
*
|
|
14
|
+
* IMPLEMENTATION LAYERS:
|
|
15
|
+
* - Text: InstructGenerator extends this class (SmolLM2-Instruct)
|
|
16
|
+
* - Text: CausalLMGenerator extends this class (DistilGPT2)
|
|
17
|
+
*
|
|
18
|
+
* @experimental This feature is experimental and may change in future versions.
|
|
19
|
+
*/
|
|
20
|
+
import { GenerationError } from './response-generator.js';
|
|
21
|
+
import { GeneratorRegistry } from './generator-registry.js';
|
|
22
|
+
import { buildPrompt, getDefaultStopSequences } from './prompt-templates.js';
|
|
23
|
+
// =============================================================================
|
|
24
|
+
// BASE GENERATOR ABSTRACT CLASS
|
|
25
|
+
// =============================================================================
|
|
26
|
+
/**
|
|
27
|
+
* Abstract base class for response generators
|
|
28
|
+
* Provides common functionality and lifecycle management
|
|
29
|
+
*/
|
|
30
|
+
export class BaseResponseGenerator {
|
|
31
|
+
modelName;
|
|
32
|
+
_isLoaded = false;
|
|
33
|
+
_modelInfo;
|
|
34
|
+
_options;
|
|
35
|
+
constructor(modelName, options = {}) {
|
|
36
|
+
this.modelName = modelName;
|
|
37
|
+
const modelInfo = GeneratorRegistry.getGeneratorInfo(modelName);
|
|
38
|
+
if (!modelInfo) {
|
|
39
|
+
throw new Error(`Generator model '${modelName}' is not supported. ` +
|
|
40
|
+
`Supported models: ${GeneratorRegistry.getSupportedGenerators().join(', ')}`);
|
|
41
|
+
}
|
|
42
|
+
this._modelInfo = modelInfo;
|
|
43
|
+
this._options = options;
|
|
44
|
+
}
|
|
45
|
+
// =============================================================================
|
|
46
|
+
// PUBLIC INTERFACE IMPLEMENTATION
|
|
47
|
+
// =============================================================================
|
|
48
|
+
get modelType() {
|
|
49
|
+
return this._modelInfo.type;
|
|
50
|
+
}
|
|
51
|
+
get maxContextLength() {
|
|
52
|
+
return this._modelInfo.capabilities.maxContextLength;
|
|
53
|
+
}
|
|
54
|
+
get maxOutputLength() {
|
|
55
|
+
return this._modelInfo.capabilities.defaultMaxOutputTokens;
|
|
56
|
+
}
|
|
57
|
+
isLoaded() {
|
|
58
|
+
return this._isLoaded;
|
|
59
|
+
}
|
|
60
|
+
getModelInfo() {
|
|
61
|
+
return { ...this._modelInfo }; // Return a copy to prevent mutation
|
|
62
|
+
}
|
|
63
|
+
// =============================================================================
|
|
64
|
+
// DEFAULT IMPLEMENTATION
|
|
65
|
+
// =============================================================================
|
|
66
|
+
/**
|
|
67
|
+
* Generate a response based on query and retrieved chunks
|
|
68
|
+
* This method orchestrates the generation pipeline
|
|
69
|
+
*/
|
|
70
|
+
async generate(request) {
|
|
71
|
+
if (!this._isLoaded) {
|
|
72
|
+
await this.loadModel();
|
|
73
|
+
}
|
|
74
|
+
const startTime = Date.now();
|
|
75
|
+
try {
|
|
76
|
+
// Get generation parameters with defaults
|
|
77
|
+
const maxTokens = request.maxTokens ?? this._modelInfo.capabilities.defaultMaxOutputTokens;
|
|
78
|
+
const temperature = request.temperature ?? this._modelInfo.capabilities.recommendedTemperature;
|
|
79
|
+
const topP = request.topP ?? 0.9;
|
|
80
|
+
const topK = request.topK ?? 50;
|
|
81
|
+
const repetitionPenalty = request.repetitionPenalty ?? 1.1;
|
|
82
|
+
const stopSequences = request.stopSequences ?? getDefaultStopSequences(this.modelType);
|
|
83
|
+
// Get max chunks for context (configurable, with model-specific default)
|
|
84
|
+
const maxChunksForContext = request.maxChunksForContext ??
|
|
85
|
+
this._modelInfo.capabilities.defaultMaxChunksForContext;
|
|
86
|
+
// Limit chunks to maxChunksForContext (assumes chunks are already reranked)
|
|
87
|
+
const totalChunks = request.chunks.length;
|
|
88
|
+
const limitedChunks = request.chunks.slice(0, maxChunksForContext);
|
|
89
|
+
if (totalChunks > maxChunksForContext) {
|
|
90
|
+
console.log(`📊 Using top ${maxChunksForContext} of ${totalChunks} reranked chunks for generation`);
|
|
91
|
+
}
|
|
92
|
+
// Build the prompt with context
|
|
93
|
+
const builtPrompt = buildPrompt({
|
|
94
|
+
query: request.query,
|
|
95
|
+
chunks: limitedChunks,
|
|
96
|
+
modelType: this.modelType,
|
|
97
|
+
systemPrompt: request.systemPrompt,
|
|
98
|
+
maxContextLength: this.maxContextLength,
|
|
99
|
+
reservedOutputTokens: maxTokens,
|
|
100
|
+
includeSourceAttribution: request.includeSourceAttribution
|
|
101
|
+
});
|
|
102
|
+
// Log context info
|
|
103
|
+
if (builtPrompt.contextInfo.truncated) {
|
|
104
|
+
console.warn(`⚠️ Context truncated: Only ${builtPrompt.contextInfo.chunksIncluded} of ` +
|
|
105
|
+
`${builtPrompt.contextInfo.totalChunks} chunks fit in context window`);
|
|
106
|
+
}
|
|
107
|
+
// Generate response
|
|
108
|
+
const result = await this.generateText(builtPrompt.prompt, {
|
|
109
|
+
maxTokens,
|
|
110
|
+
temperature,
|
|
111
|
+
topP,
|
|
112
|
+
topK,
|
|
113
|
+
repetitionPenalty,
|
|
114
|
+
stopSequences
|
|
115
|
+
});
|
|
116
|
+
const generationTimeMs = Date.now() - startTime;
|
|
117
|
+
// Clean up the response text
|
|
118
|
+
const cleanedResponse = this.cleanResponseText(result.text);
|
|
119
|
+
return {
|
|
120
|
+
response: cleanedResponse,
|
|
121
|
+
tokensUsed: result.promptTokens + result.completionTokens,
|
|
122
|
+
truncated: builtPrompt.contextInfo.truncated,
|
|
123
|
+
modelName: this.modelName,
|
|
124
|
+
generationTimeMs,
|
|
125
|
+
metadata: {
|
|
126
|
+
promptTokens: result.promptTokens,
|
|
127
|
+
completionTokens: result.completionTokens,
|
|
128
|
+
chunksIncluded: builtPrompt.contextInfo.chunksIncluded,
|
|
129
|
+
totalChunks: totalChunks, // Report original total, not limited
|
|
130
|
+
finishReason: result.finishReason
|
|
131
|
+
}
|
|
132
|
+
};
|
|
133
|
+
}
|
|
134
|
+
catch (error) {
|
|
135
|
+
const generationTimeMs = Date.now() - startTime;
|
|
136
|
+
if (error instanceof GenerationError) {
|
|
137
|
+
throw error;
|
|
138
|
+
}
|
|
139
|
+
throw new GenerationError(this.modelName, 'generation', `Generation failed: ${error instanceof Error ? error.message : 'Unknown error'}`, error instanceof Error ? error : undefined);
|
|
140
|
+
}
|
|
141
|
+
}
|
|
142
|
+
// =============================================================================
|
|
143
|
+
// PROTECTED HELPER METHODS
|
|
144
|
+
// =============================================================================
|
|
145
|
+
/**
|
|
146
|
+
* Validate that the model is loaded before operations
|
|
147
|
+
*/
|
|
148
|
+
ensureLoaded() {
|
|
149
|
+
if (!this._isLoaded) {
|
|
150
|
+
throw new GenerationError(this.modelName, 'generation', `Model '${this.modelName}' is not loaded. Call loadModel() first.`);
|
|
151
|
+
}
|
|
152
|
+
}
|
|
153
|
+
/**
|
|
154
|
+
* Clean up response text by removing artifacts
|
|
155
|
+
*/
|
|
156
|
+
cleanResponseText(text) {
|
|
157
|
+
let cleaned = text.trim();
|
|
158
|
+
// Remove common artifacts
|
|
159
|
+
const artifactsToRemove = [
|
|
160
|
+
'<|im_end|>',
|
|
161
|
+
'<|im_start|>',
|
|
162
|
+
'<|endoftext|>',
|
|
163
|
+
'<|assistant|>',
|
|
164
|
+
'<|user|>',
|
|
165
|
+
'<|system|>'
|
|
166
|
+
];
|
|
167
|
+
for (const artifact of artifactsToRemove) {
|
|
168
|
+
cleaned = cleaned.split(artifact)[0];
|
|
169
|
+
}
|
|
170
|
+
// Remove trailing incomplete sentences (if cut off at max tokens)
|
|
171
|
+
if (cleaned.length > 0 && !cleaned.match(/[.!?]$/)) {
|
|
172
|
+
const lastSentenceEnd = Math.max(cleaned.lastIndexOf('.'), cleaned.lastIndexOf('!'), cleaned.lastIndexOf('?'));
|
|
173
|
+
if (lastSentenceEnd > cleaned.length * 0.5) {
|
|
174
|
+
cleaned = cleaned.substring(0, lastSentenceEnd + 1);
|
|
175
|
+
}
|
|
176
|
+
}
|
|
177
|
+
return cleaned.trim();
|
|
178
|
+
}
|
|
179
|
+
/**
|
|
180
|
+
* Log model loading progress
|
|
181
|
+
*/
|
|
182
|
+
logModelLoading(stage, details) {
|
|
183
|
+
const message = `[${this.modelName}] ${stage}`;
|
|
184
|
+
if (details) {
|
|
185
|
+
console.log(`${message}: ${details}`);
|
|
186
|
+
}
|
|
187
|
+
else {
|
|
188
|
+
console.log(message);
|
|
189
|
+
}
|
|
190
|
+
}
|
|
191
|
+
/**
|
|
192
|
+
* Handle model loading errors with helpful messages
|
|
193
|
+
*/
|
|
194
|
+
handleLoadingError(error) {
|
|
195
|
+
const baseMessage = `Failed to load generator model '${this.modelName}': ${error.message}`;
|
|
196
|
+
// Provide specific guidance based on error type
|
|
197
|
+
if (error.message.includes('network') || error.message.includes('fetch')) {
|
|
198
|
+
return new GenerationError(this.modelName, 'loading', `${baseMessage}\n` +
|
|
199
|
+
`This appears to be a network error. Please check your internet connection ` +
|
|
200
|
+
`and ensure the model repository is accessible.`, error);
|
|
201
|
+
}
|
|
202
|
+
if (error.message.includes('memory') || error.message.includes('OOM')) {
|
|
203
|
+
return new GenerationError(this.modelName, 'loading', `${baseMessage}\n` +
|
|
204
|
+
`This appears to be a memory error. The model requires ` +
|
|
205
|
+
`${this._modelInfo.requirements.minimumMemory}MB. Try closing other applications ` +
|
|
206
|
+
`or using a smaller model like 'Xenova/distilgpt2'.`, error);
|
|
207
|
+
}
|
|
208
|
+
return new GenerationError(this.modelName, 'loading', baseMessage, error);
|
|
209
|
+
}
|
|
210
|
+
}
|
|
211
|
+
/**
|
|
212
|
+
* Create generator options with defaults
|
|
213
|
+
*/
|
|
214
|
+
export function createGeneratorOptions(options = {}) {
|
|
215
|
+
return {
|
|
216
|
+
timeout: 60000, // 60 seconds
|
|
217
|
+
enableGPU: false,
|
|
218
|
+
logLevel: 'info',
|
|
219
|
+
...options
|
|
220
|
+
};
|
|
221
|
+
}
|
|
222
|
+
//# sourceMappingURL=abstract-generator.js.map
|
|
@@ -190,6 +190,9 @@ export class BinaryIndexFormat {
|
|
|
190
190
|
const view = new DataView(buffer.buffer, buffer.byteOffset, buffer.byteLength);
|
|
191
191
|
let offset = 0;
|
|
192
192
|
// Read basic header (24 bytes, all little-endian)
|
|
193
|
+
if (buffer.byteLength < 24) {
|
|
194
|
+
throw new Error(`Index file too small: expected at least 24 bytes, got ${buffer.byteLength}`);
|
|
195
|
+
}
|
|
193
196
|
const dimensions = view.getUint32(offset, true);
|
|
194
197
|
offset += 4;
|
|
195
198
|
const maxElements = view.getUint32(offset, true);
|
|
@@ -202,10 +205,20 @@ export class BinaryIndexFormat {
|
|
|
202
205
|
offset += 4;
|
|
203
206
|
const currentSize = view.getUint32(offset, true);
|
|
204
207
|
offset += 4;
|
|
205
|
-
//
|
|
206
|
-
const
|
|
207
|
-
|
|
208
|
-
|
|
208
|
+
// Calculate expected size for original format
|
|
209
|
+
const vectorSize = 4 + (dimensions * 4); // id + vector
|
|
210
|
+
const expectedOriginalSize = 24 + (currentSize * vectorSize);
|
|
211
|
+
// Check if this is the extended grouped format (44 bytes header)
|
|
212
|
+
// Extended header has: 24 bytes basic + 4 bytes hasGroups + 16 bytes for offsets/counts = 44 bytes
|
|
213
|
+
// Only check for grouped format if file is larger than expected original format size
|
|
214
|
+
const hasGroups = buffer.byteLength > expectedOriginalSize && buffer.byteLength >= 44 && offset + 4 <= buffer.byteLength
|
|
215
|
+
? view.getUint32(offset, true)
|
|
216
|
+
: 0;
|
|
217
|
+
if (hasGroups === 1 && buffer.byteLength >= 44) {
|
|
218
|
+
// Load grouped format - ensure we have enough bytes for extended header
|
|
219
|
+
if (offset + 20 > buffer.byteLength) {
|
|
220
|
+
throw new Error(`Index file too small for grouped format: expected at least ${offset + 20} bytes, got ${buffer.byteLength}`);
|
|
221
|
+
}
|
|
209
222
|
const textOffset = view.getUint32(offset + 4, true);
|
|
210
223
|
const textCount = view.getUint32(offset + 8, true);
|
|
211
224
|
const imageOffset = view.getUint32(offset + 12, true);
|
|
@@ -218,14 +231,23 @@ export class BinaryIndexFormat {
|
|
|
218
231
|
if (offset % 4 !== 0) {
|
|
219
232
|
throw new Error(`Offset ${offset} is not 4-byte aligned`);
|
|
220
233
|
}
|
|
234
|
+
// Check bounds before reading vector ID
|
|
235
|
+
if (offset + 4 > buffer.byteLength) {
|
|
236
|
+
throw new Error(`Text vector ID at offset ${offset} is outside the bounds of the DataView (buffer size: ${buffer.byteLength})`);
|
|
237
|
+
}
|
|
221
238
|
// Read vector ID
|
|
222
239
|
const id = view.getUint32(offset, true);
|
|
223
240
|
offset += 4;
|
|
241
|
+
// Check bounds before reading vector data
|
|
242
|
+
const vectorDataSize = dimensions * 4;
|
|
243
|
+
if (offset + vectorDataSize > buffer.byteLength) {
|
|
244
|
+
throw new Error(`Text vector data at offset ${offset} would exceed buffer bounds (buffer size: ${buffer.byteLength}, required: ${offset + vectorDataSize})`);
|
|
245
|
+
}
|
|
224
246
|
// Zero-copy Float32Array view
|
|
225
247
|
const vectorView = new Float32Array(buffer.buffer, buffer.byteOffset + offset, dimensions);
|
|
226
248
|
// Copy to avoid buffer lifecycle issues
|
|
227
249
|
const vector = new Float32Array(vectorView);
|
|
228
|
-
offset +=
|
|
250
|
+
offset += vectorDataSize;
|
|
229
251
|
textVectors.push({ id, vector });
|
|
230
252
|
}
|
|
231
253
|
// Load image vectors
|
|
@@ -236,14 +258,23 @@ export class BinaryIndexFormat {
|
|
|
236
258
|
if (offset % 4 !== 0) {
|
|
237
259
|
throw new Error(`Offset ${offset} is not 4-byte aligned`);
|
|
238
260
|
}
|
|
261
|
+
// Check bounds before reading vector ID
|
|
262
|
+
if (offset + 4 > buffer.byteLength) {
|
|
263
|
+
throw new Error(`Image vector ID at offset ${offset} is outside the bounds of the DataView (buffer size: ${buffer.byteLength})`);
|
|
264
|
+
}
|
|
239
265
|
// Read vector ID
|
|
240
266
|
const id = view.getUint32(offset, true);
|
|
241
267
|
offset += 4;
|
|
268
|
+
// Check bounds before reading vector data
|
|
269
|
+
const vectorDataSize = dimensions * 4;
|
|
270
|
+
if (offset + vectorDataSize > buffer.byteLength) {
|
|
271
|
+
throw new Error(`Image vector data at offset ${offset} would exceed buffer bounds (buffer size: ${buffer.byteLength}, required: ${offset + vectorDataSize})`);
|
|
272
|
+
}
|
|
242
273
|
// Zero-copy Float32Array view
|
|
243
274
|
const vectorView = new Float32Array(buffer.buffer, buffer.byteOffset + offset, dimensions);
|
|
244
275
|
// Copy to avoid buffer lifecycle issues
|
|
245
276
|
const vector = new Float32Array(vectorView);
|
|
246
|
-
offset +=
|
|
277
|
+
offset += vectorDataSize;
|
|
247
278
|
imageVectors.push({ id, vector });
|
|
248
279
|
}
|
|
249
280
|
// Combine all vectors for backward compatibility
|
|
@@ -269,14 +300,23 @@ export class BinaryIndexFormat {
|
|
|
269
300
|
if (offset % 4 !== 0) {
|
|
270
301
|
throw new Error(`Offset ${offset} is not 4-byte aligned`);
|
|
271
302
|
}
|
|
303
|
+
// Check bounds before reading vector ID
|
|
304
|
+
if (offset + 4 > buffer.byteLength) {
|
|
305
|
+
throw new Error(`Offset ${offset} is outside the bounds of the DataView (buffer size: ${buffer.byteLength})`);
|
|
306
|
+
}
|
|
272
307
|
// Read vector ID
|
|
273
308
|
const id = view.getUint32(offset, true);
|
|
274
309
|
offset += 4;
|
|
310
|
+
// Check bounds before reading vector data
|
|
311
|
+
const vectorDataSize = dimensions * 4;
|
|
312
|
+
if (offset + vectorDataSize > buffer.byteLength) {
|
|
313
|
+
throw new Error(`Vector data at offset ${offset} would exceed buffer bounds (buffer size: ${buffer.byteLength}, required: ${offset + vectorDataSize})`);
|
|
314
|
+
}
|
|
275
315
|
// Zero-copy Float32Array view (fast!)
|
|
276
316
|
const vectorView = new Float32Array(buffer.buffer, buffer.byteOffset + offset, dimensions);
|
|
277
317
|
// Copy to avoid buffer lifecycle issues
|
|
278
318
|
const vector = new Float32Array(vectorView);
|
|
279
|
-
offset +=
|
|
319
|
+
offset += vectorDataSize;
|
|
280
320
|
vectors.push({ id, vector });
|
|
281
321
|
}
|
|
282
322
|
return {
|