rag-lite-ts 2.3.0 → 2.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (65) hide show
  1. package/dist/cjs/cli/search.js +77 -2
  2. package/dist/cjs/cli.js +28 -1
  3. package/dist/cjs/core/abstract-generator.d.ts +97 -0
  4. package/dist/cjs/core/abstract-generator.js +222 -0
  5. package/dist/cjs/core/binary-index-format.js +47 -7
  6. package/dist/cjs/core/generator-registry.d.ts +114 -0
  7. package/dist/cjs/core/generator-registry.js +280 -0
  8. package/dist/cjs/core/index.d.ts +4 -0
  9. package/dist/cjs/core/index.js +11 -0
  10. package/dist/cjs/core/lazy-dependency-loader.d.ts +43 -0
  11. package/dist/cjs/core/lazy-dependency-loader.js +111 -2
  12. package/dist/cjs/core/prompt-templates.d.ts +138 -0
  13. package/dist/cjs/core/prompt-templates.js +225 -0
  14. package/dist/cjs/core/response-generator.d.ts +132 -0
  15. package/dist/cjs/core/response-generator.js +69 -0
  16. package/dist/cjs/core/search.d.ts +72 -1
  17. package/dist/cjs/core/search.js +79 -6
  18. package/dist/cjs/core/types.d.ts +1 -0
  19. package/dist/cjs/core/vector-index-worker.js +10 -0
  20. package/dist/cjs/core/vector-index.js +69 -19
  21. package/dist/cjs/factories/generator-factory.d.ts +88 -0
  22. package/dist/cjs/factories/generator-factory.js +151 -0
  23. package/dist/cjs/factories/index.d.ts +1 -0
  24. package/dist/cjs/factories/index.js +5 -0
  25. package/dist/cjs/index.d.ts +9 -0
  26. package/dist/cjs/index.js +16 -0
  27. package/dist/cjs/text/generators/causal-lm-generator.d.ts +65 -0
  28. package/dist/cjs/text/generators/causal-lm-generator.js +197 -0
  29. package/dist/cjs/text/generators/index.d.ts +10 -0
  30. package/dist/cjs/text/generators/index.js +10 -0
  31. package/dist/cjs/text/generators/instruct-generator.d.ts +62 -0
  32. package/dist/cjs/text/generators/instruct-generator.js +192 -0
  33. package/dist/esm/cli/search.js +77 -2
  34. package/dist/esm/cli.js +28 -1
  35. package/dist/esm/core/abstract-generator.d.ts +97 -0
  36. package/dist/esm/core/abstract-generator.js +222 -0
  37. package/dist/esm/core/binary-index-format.js +47 -7
  38. package/dist/esm/core/generator-registry.d.ts +114 -0
  39. package/dist/esm/core/generator-registry.js +280 -0
  40. package/dist/esm/core/index.d.ts +4 -0
  41. package/dist/esm/core/index.js +11 -0
  42. package/dist/esm/core/lazy-dependency-loader.d.ts +43 -0
  43. package/dist/esm/core/lazy-dependency-loader.js +111 -2
  44. package/dist/esm/core/prompt-templates.d.ts +138 -0
  45. package/dist/esm/core/prompt-templates.js +225 -0
  46. package/dist/esm/core/response-generator.d.ts +132 -0
  47. package/dist/esm/core/response-generator.js +69 -0
  48. package/dist/esm/core/search.d.ts +72 -1
  49. package/dist/esm/core/search.js +79 -6
  50. package/dist/esm/core/types.d.ts +1 -0
  51. package/dist/esm/core/vector-index-worker.js +10 -0
  52. package/dist/esm/core/vector-index.js +69 -19
  53. package/dist/esm/factories/generator-factory.d.ts +88 -0
  54. package/dist/esm/factories/generator-factory.js +151 -0
  55. package/dist/esm/factories/index.d.ts +1 -0
  56. package/dist/esm/factories/index.js +5 -0
  57. package/dist/esm/index.d.ts +9 -0
  58. package/dist/esm/index.js +16 -0
  59. package/dist/esm/text/generators/causal-lm-generator.d.ts +65 -0
  60. package/dist/esm/text/generators/causal-lm-generator.js +197 -0
  61. package/dist/esm/text/generators/index.d.ts +10 -0
  62. package/dist/esm/text/generators/index.js +10 -0
  63. package/dist/esm/text/generators/instruct-generator.d.ts +62 -0
  64. package/dist/esm/text/generators/instruct-generator.js +192 -0
  65. package/package.json +1 -1
@@ -0,0 +1,197 @@
1
+ /**
2
+ * TEXT IMPLEMENTATION — Causal LM Generator for DistilGPT2
3
+ *
4
+ * Implements ResponseGenerator interface for causal language models.
5
+ * Supports Xenova/distilgpt2 for fast, basic text generation.
6
+ *
7
+ * Features:
8
+ * - Simple prompt formatting (no chat template)
9
+ * - Fast generation with smaller model
10
+ * - Streaming generation support
11
+ * - Resource management via ResourceManager
12
+ *
13
+ * Note: Causal LM models don't support system prompts, so responses
14
+ * may be less focused than instruction-tuned models.
15
+ *
16
+ * @experimental This feature is experimental and may change in future versions.
17
+ */
18
+ import '../../dom-polyfills.js';
19
+ import { BaseResponseGenerator } from '../../core/abstract-generator.js';
20
+ import { GenerationError } from '../../core/response-generator.js';
21
+ import { getResourceManager } from '../../core/resource-manager.js';
22
+ import { config } from '../../core/config.js';
23
+ // =============================================================================
24
+ // CAUSAL LM GENERATOR IMPLEMENTATION
25
+ // =============================================================================
26
+ /**
27
+ * Causal LM generator implementation for DistilGPT2
28
+ *
29
+ * Uses causal language models that generate text based on simple prompts.
30
+ * Faster but may produce less focused responses than instruct models.
31
+ */
32
+ export class CausalLMGenerator extends BaseResponseGenerator {
33
+ pipeline = null;
34
+ tokenizer = null;
35
+ resourceManager = getResourceManager();
36
+ resourceId;
37
+ constructor(modelName, options = {}) {
38
+ super(modelName, options);
39
+ // Validate model is a causal-lm model
40
+ if (this.modelType !== 'causal-lm') {
41
+ throw new Error(`CausalLMGenerator requires a causal-lm model, but '${modelName}' is type '${this.modelType}'`);
42
+ }
43
+ }
44
+ // =============================================================================
45
+ // MODEL LIFECYCLE
46
+ // =============================================================================
47
+ /**
48
+ * Load the causal LM model using transformers.js
49
+ */
50
+ async loadModel() {
51
+ if (this._isLoaded && this.pipeline) {
52
+ return;
53
+ }
54
+ try {
55
+ this.logModelLoading('Loading causal LM generator model');
56
+ // Ensure DOM polyfills
57
+ if (typeof globalThis.self === 'undefined') {
58
+ globalThis.self = globalThis;
59
+ }
60
+ // Dynamic import transformers.js
61
+ const { pipeline, AutoTokenizer } = await import('@huggingface/transformers');
62
+ // Load tokenizer first for token counting
63
+ this.logModelLoading('Loading tokenizer');
64
+ this.tokenizer = await AutoTokenizer.from_pretrained(this.modelName, {
65
+ cache_dir: this._options.cachePath || config.model_cache_path
66
+ });
67
+ // Load text generation pipeline
68
+ this.logModelLoading('Loading text generation pipeline');
69
+ this.pipeline = await pipeline('text-generation', this.modelName, {
70
+ cache_dir: this._options.cachePath || config.model_cache_path,
71
+ dtype: 'fp32'
72
+ });
73
+ // Register with resource manager
74
+ this.resourceId = this.resourceManager.registerModel(this.pipeline, this.modelName, 'generator');
75
+ this._isLoaded = true;
76
+ this.logModelLoading('Model loaded successfully');
77
+ }
78
+ catch (error) {
79
+ this._isLoaded = false;
80
+ throw this.handleLoadingError(error);
81
+ }
82
+ }
83
+ /**
84
+ * Clean up model resources
85
+ */
86
+ async cleanup() {
87
+ try {
88
+ if (this.resourceId) {
89
+ await this.resourceManager.cleanupResource(this.resourceId);
90
+ this.resourceId = undefined;
91
+ }
92
+ // Clear references
93
+ this.pipeline = null;
94
+ this.tokenizer = null;
95
+ this._isLoaded = false;
96
+ // Force GC if available
97
+ if (global.gc) {
98
+ global.gc();
99
+ }
100
+ this.logModelLoading('Resources cleaned up');
101
+ }
102
+ catch (error) {
103
+ console.warn(`Cleanup error: ${error instanceof Error ? error.message : 'Unknown'}`);
104
+ this.pipeline = null;
105
+ this.tokenizer = null;
106
+ this._isLoaded = false;
107
+ }
108
+ }
109
+ // =============================================================================
110
+ // GENERATION IMPLEMENTATION
111
+ // =============================================================================
112
+ /**
113
+ * Generate text using the causal LM model
114
+ */
115
+ async generateText(prompt, options) {
116
+ this.ensureLoaded();
117
+ try {
118
+ // Count prompt tokens
119
+ const promptTokens = await this.countTokens(prompt);
120
+ // Generate - GPT2 uses return_full_text differently
121
+ const result = await this.pipeline(prompt, {
122
+ max_new_tokens: options.maxTokens,
123
+ temperature: Math.max(0.1, options.temperature), // GPT2 needs temp > 0
124
+ top_p: options.topP,
125
+ top_k: options.topK,
126
+ repetition_penalty: options.repetitionPenalty,
127
+ do_sample: true,
128
+ return_full_text: true, // GPT2 needs full text
129
+ pad_token_id: this.tokenizer?.eos_token_id // GPT2 uses eos as pad
130
+ });
131
+ // Extract generated text (remove prompt)
132
+ let generatedText = result[0]?.generated_text || '';
133
+ if (generatedText.startsWith(prompt)) {
134
+ generatedText = generatedText.substring(prompt.length);
135
+ }
136
+ // Process stop sequences
137
+ let finalText = generatedText;
138
+ let finishReason = 'complete';
139
+ for (const stopSeq of options.stopSequences) {
140
+ const stopIndex = finalText.indexOf(stopSeq);
141
+ if (stopIndex !== -1) {
142
+ finalText = finalText.substring(0, stopIndex);
143
+ finishReason = 'stop_sequence';
144
+ break;
145
+ }
146
+ }
147
+ // Count completion tokens
148
+ const completionTokens = await this.countTokens(finalText);
149
+ // Check if we hit max tokens
150
+ if (completionTokens >= options.maxTokens - 5) {
151
+ finishReason = 'length';
152
+ }
153
+ return {
154
+ text: finalText,
155
+ promptTokens,
156
+ completionTokens,
157
+ finishReason
158
+ };
159
+ }
160
+ catch (error) {
161
+ throw new GenerationError(this.modelName, 'generation', `Text generation failed: ${error instanceof Error ? error.message : 'Unknown error'}`, error instanceof Error ? error : undefined);
162
+ }
163
+ }
164
+ /**
165
+ * Generate text with streaming output
166
+ */
167
+ async *generateStream(request) {
168
+ // For now, fall back to non-streaming and yield the full response
169
+ // TODO: Implement true streaming when transformers.js supports it better
170
+ const result = await this.generate(request);
171
+ yield result.response;
172
+ }
173
+ // =============================================================================
174
+ // HELPER METHODS
175
+ // =============================================================================
176
+ /**
177
+ * Count tokens in a text string
178
+ */
179
+ async countTokens(text) {
180
+ if (!this.tokenizer) {
181
+ // Fallback to estimation
182
+ return Math.ceil(text.length / 4);
183
+ }
184
+ try {
185
+ const encoded = await this.tokenizer(text, {
186
+ return_tensors: false,
187
+ padding: false,
188
+ truncation: false
189
+ });
190
+ return encoded.input_ids?.length || Math.ceil(text.length / 4);
191
+ }
192
+ catch {
193
+ return Math.ceil(text.length / 4);
194
+ }
195
+ }
196
+ }
197
+ //# sourceMappingURL=causal-lm-generator.js.map
@@ -0,0 +1,10 @@
1
+ /**
2
+ * TEXT GENERATORS — Export Module
3
+ *
4
+ * Provides text generation capabilities for RAG response synthesis.
5
+ *
6
+ * @experimental This feature is experimental and may change in future versions.
7
+ */
8
+ export { InstructGenerator } from './instruct-generator.js';
9
+ export { CausalLMGenerator } from './causal-lm-generator.js';
10
+ //# sourceMappingURL=index.d.ts.map
@@ -0,0 +1,10 @@
1
+ /**
2
+ * TEXT GENERATORS — Export Module
3
+ *
4
+ * Provides text generation capabilities for RAG response synthesis.
5
+ *
6
+ * @experimental This feature is experimental and may change in future versions.
7
+ */
8
+ export { InstructGenerator } from './instruct-generator.js';
9
+ export { CausalLMGenerator } from './causal-lm-generator.js';
10
+ //# sourceMappingURL=index.js.map
@@ -0,0 +1,62 @@
1
+ /**
2
+ * TEXT IMPLEMENTATION — Instruct Generator for SmolLM2-Instruct Models
3
+ *
4
+ * Implements ResponseGenerator interface for instruction-tuned models.
5
+ * Supports SmolLM2-135M-Instruct and SmolLM2-360M-Instruct.
6
+ *
7
+ * Features:
8
+ * - Chat template formatting with system/user/assistant roles
9
+ * - Streaming generation support
10
+ * - Proper stop sequence handling
11
+ * - Resource management via ResourceManager
12
+ *
13
+ * @experimental This feature is experimental and may change in future versions.
14
+ */
15
+ import '../../dom-polyfills.js';
16
+ import { BaseResponseGenerator, type GeneratorOptions } from '../../core/abstract-generator.js';
17
+ /**
18
+ * Instruct generator implementation for SmolLM2-Instruct models
19
+ *
20
+ * Uses instruction-tuned models that understand chat templates with
21
+ * system, user, and assistant roles for better response quality.
22
+ */
23
+ export declare class InstructGenerator extends BaseResponseGenerator {
24
+ private pipeline;
25
+ private tokenizer;
26
+ private resourceManager;
27
+ private resourceId?;
28
+ constructor(modelName: string, options?: GeneratorOptions);
29
+ /**
30
+ * Load the instruct model using transformers.js
31
+ */
32
+ loadModel(): Promise<void>;
33
+ /**
34
+ * Clean up model resources
35
+ */
36
+ cleanup(): Promise<void>;
37
+ /**
38
+ * Generate text using the instruct model
39
+ */
40
+ protected generateText(prompt: string, options: {
41
+ maxTokens: number;
42
+ temperature: number;
43
+ topP: number;
44
+ topK: number;
45
+ repetitionPenalty: number;
46
+ stopSequences: string[];
47
+ }): Promise<{
48
+ text: string;
49
+ promptTokens: number;
50
+ completionTokens: number;
51
+ finishReason: 'complete' | 'length' | 'stop_sequence' | 'error';
52
+ }>;
53
+ /**
54
+ * Generate text with streaming output
55
+ */
56
+ generateStream(request: import('../../core/response-generator.js').GenerationRequest): AsyncIterable<string>;
57
+ /**
58
+ * Count tokens in a text string
59
+ */
60
+ private countTokens;
61
+ }
62
+ //# sourceMappingURL=instruct-generator.d.ts.map
@@ -0,0 +1,192 @@
1
+ /**
2
+ * TEXT IMPLEMENTATION — Instruct Generator for SmolLM2-Instruct Models
3
+ *
4
+ * Implements ResponseGenerator interface for instruction-tuned models.
5
+ * Supports SmolLM2-135M-Instruct and SmolLM2-360M-Instruct.
6
+ *
7
+ * Features:
8
+ * - Chat template formatting with system/user/assistant roles
9
+ * - Streaming generation support
10
+ * - Proper stop sequence handling
11
+ * - Resource management via ResourceManager
12
+ *
13
+ * @experimental This feature is experimental and may change in future versions.
14
+ */
15
+ import '../../dom-polyfills.js';
16
+ import { BaseResponseGenerator } from '../../core/abstract-generator.js';
17
+ import { GenerationError } from '../../core/response-generator.js';
18
+ import { getResourceManager } from '../../core/resource-manager.js';
19
+ import { config } from '../../core/config.js';
20
+ // =============================================================================
21
+ // INSTRUCT GENERATOR IMPLEMENTATION
22
+ // =============================================================================
23
+ /**
24
+ * Instruct generator implementation for SmolLM2-Instruct models
25
+ *
26
+ * Uses instruction-tuned models that understand chat templates with
27
+ * system, user, and assistant roles for better response quality.
28
+ */
29
+ export class InstructGenerator extends BaseResponseGenerator {
30
+ pipeline = null;
31
+ tokenizer = null;
32
+ resourceManager = getResourceManager();
33
+ resourceId;
34
+ constructor(modelName, options = {}) {
35
+ super(modelName, options);
36
+ // Validate model is an instruct model
37
+ if (this.modelType !== 'instruct') {
38
+ throw new Error(`InstructGenerator requires an instruct model, but '${modelName}' is type '${this.modelType}'`);
39
+ }
40
+ }
41
+ // =============================================================================
42
+ // MODEL LIFECYCLE
43
+ // =============================================================================
44
+ /**
45
+ * Load the instruct model using transformers.js
46
+ */
47
+ async loadModel() {
48
+ if (this._isLoaded && this.pipeline) {
49
+ return;
50
+ }
51
+ try {
52
+ this.logModelLoading('Loading instruct generator model');
53
+ // Ensure DOM polyfills
54
+ if (typeof globalThis.self === 'undefined') {
55
+ globalThis.self = globalThis;
56
+ }
57
+ // Dynamic import transformers.js
58
+ const { pipeline, AutoTokenizer } = await import('@huggingface/transformers');
59
+ // Load tokenizer first for token counting
60
+ this.logModelLoading('Loading tokenizer');
61
+ this.tokenizer = await AutoTokenizer.from_pretrained(this.modelName, {
62
+ cache_dir: this._options.cachePath || config.model_cache_path
63
+ });
64
+ // Load text generation pipeline
65
+ this.logModelLoading('Loading text generation pipeline');
66
+ this.pipeline = await pipeline('text-generation', this.modelName, {
67
+ cache_dir: this._options.cachePath || config.model_cache_path,
68
+ dtype: 'fp32'
69
+ });
70
+ // Register with resource manager
71
+ this.resourceId = this.resourceManager.registerModel(this.pipeline, this.modelName, 'generator');
72
+ this._isLoaded = true;
73
+ this.logModelLoading('Model loaded successfully');
74
+ }
75
+ catch (error) {
76
+ this._isLoaded = false;
77
+ throw this.handleLoadingError(error);
78
+ }
79
+ }
80
+ /**
81
+ * Clean up model resources
82
+ */
83
+ async cleanup() {
84
+ try {
85
+ if (this.resourceId) {
86
+ await this.resourceManager.cleanupResource(this.resourceId);
87
+ this.resourceId = undefined;
88
+ }
89
+ // Clear references
90
+ this.pipeline = null;
91
+ this.tokenizer = null;
92
+ this._isLoaded = false;
93
+ // Force GC if available
94
+ if (global.gc) {
95
+ global.gc();
96
+ }
97
+ this.logModelLoading('Resources cleaned up');
98
+ }
99
+ catch (error) {
100
+ console.warn(`Cleanup error: ${error instanceof Error ? error.message : 'Unknown'}`);
101
+ this.pipeline = null;
102
+ this.tokenizer = null;
103
+ this._isLoaded = false;
104
+ }
105
+ }
106
+ // =============================================================================
107
+ // GENERATION IMPLEMENTATION
108
+ // =============================================================================
109
+ /**
110
+ * Generate text using the instruct model
111
+ */
112
+ async generateText(prompt, options) {
113
+ this.ensureLoaded();
114
+ try {
115
+ // Count prompt tokens
116
+ const promptTokens = await this.countTokens(prompt);
117
+ // Generate
118
+ const result = await this.pipeline(prompt, {
119
+ max_new_tokens: options.maxTokens,
120
+ temperature: options.temperature,
121
+ top_p: options.topP,
122
+ top_k: options.topK,
123
+ repetition_penalty: options.repetitionPenalty,
124
+ do_sample: options.temperature > 0,
125
+ return_full_text: false,
126
+ pad_token_id: this.tokenizer?.pad_token_id,
127
+ eos_token_id: this.tokenizer?.eos_token_id
128
+ });
129
+ // Extract generated text
130
+ const generatedText = result[0]?.generated_text || '';
131
+ // Process stop sequences
132
+ let finalText = generatedText;
133
+ let finishReason = 'complete';
134
+ for (const stopSeq of options.stopSequences) {
135
+ const stopIndex = finalText.indexOf(stopSeq);
136
+ if (stopIndex !== -1) {
137
+ finalText = finalText.substring(0, stopIndex);
138
+ finishReason = 'stop_sequence';
139
+ break;
140
+ }
141
+ }
142
+ // Count completion tokens
143
+ const completionTokens = await this.countTokens(finalText);
144
+ // Check if we hit max tokens
145
+ if (completionTokens >= options.maxTokens - 5) {
146
+ finishReason = 'length';
147
+ }
148
+ return {
149
+ text: finalText,
150
+ promptTokens,
151
+ completionTokens,
152
+ finishReason
153
+ };
154
+ }
155
+ catch (error) {
156
+ throw new GenerationError(this.modelName, 'generation', `Text generation failed: ${error instanceof Error ? error.message : 'Unknown error'}`, error instanceof Error ? error : undefined);
157
+ }
158
+ }
159
+ /**
160
+ * Generate text with streaming output
161
+ */
162
+ async *generateStream(request) {
163
+ // For now, fall back to non-streaming and yield the full response
164
+ // TODO: Implement true streaming when transformers.js supports it better
165
+ const result = await this.generate(request);
166
+ yield result.response;
167
+ }
168
+ // =============================================================================
169
+ // HELPER METHODS
170
+ // =============================================================================
171
+ /**
172
+ * Count tokens in a text string
173
+ */
174
+ async countTokens(text) {
175
+ if (!this.tokenizer) {
176
+ // Fallback to estimation
177
+ return Math.ceil(text.length / 4);
178
+ }
179
+ try {
180
+ const encoded = await this.tokenizer(text, {
181
+ return_tensors: false,
182
+ padding: false,
183
+ truncation: false
184
+ });
185
+ return encoded.input_ids?.length || Math.ceil(text.length / 4);
186
+ }
187
+ catch {
188
+ return Math.ceil(text.length / 4);
189
+ }
190
+ }
191
+ }
192
+ //# sourceMappingURL=instruct-generator.js.map
@@ -132,7 +132,7 @@ export async function runSearch(query, options = {}) {
132
132
  process.exit(EXIT_CODES.MODEL_ERROR);
133
133
  }
134
134
  }
135
- // Prepare search options
135
+ // Prepare search options (with generation support)
136
136
  const searchOptions = {};
137
137
  if (options['top-k'] !== undefined) {
138
138
  searchOptions.top_k = options['top-k'];
@@ -164,9 +164,60 @@ export async function runSearch(query, options = {}) {
164
164
  }
165
165
  // Track whether reranking will actually be used in this search
166
166
  const rerankingUsed = searchOptions.rerank === true;
167
+ // Handle generation options (experimental, text mode only)
168
+ const generateResponse = options.generate === true;
169
+ const generatorModel = options.generator;
170
+ const maxGenerationTokens = options['max-tokens'];
171
+ const generationTemperature = options.temperature;
172
+ const maxChunksForContext = options['max-chunks'];
173
+ // Generation only supported in text mode
174
+ if (generateResponse && isImage) {
175
+ console.warn('⚠️ [EXPERIMENTAL] Generation is only supported for text searches.');
176
+ console.warn(' Image search results will be returned without generation.');
177
+ console.warn('');
178
+ }
179
+ // Generation requires reranking - enable it automatically
180
+ let rerankingEnabledForGeneration = false;
181
+ if (generateResponse && !isImage && !searchOptions.rerank) {
182
+ searchOptions.rerank = true;
183
+ rerankingEnabledForGeneration = true;
184
+ console.log('📋 Reranking automatically enabled (required for generation)');
185
+ }
186
+ // Set up generator if generation is requested (text mode only)
187
+ let generateFn;
188
+ if (generateResponse && !isImage) {
189
+ try {
190
+ console.log('🤖 [EXPERIMENTAL] Initializing response generator...');
191
+ const { createGenerateFunctionFromModel, getDefaultGeneratorModel } = await import('../factories/generator-factory.js');
192
+ const { getDefaultMaxChunksForContext } = await import('../core/generator-registry.js');
193
+ const modelToUse = generatorModel || getDefaultGeneratorModel();
194
+ const defaultChunks = getDefaultMaxChunksForContext(modelToUse) || 3;
195
+ console.log(` Model: ${modelToUse}`);
196
+ console.log(` Max chunks for context: ${maxChunksForContext || defaultChunks} (default: ${defaultChunks})`);
197
+ generateFn = await createGenerateFunctionFromModel(modelToUse);
198
+ searchEngine.setGenerateFunction(generateFn);
199
+ console.log('✅ Generator initialized');
200
+ console.log('');
201
+ }
202
+ catch (error) {
203
+ console.error('❌ [EXPERIMENTAL] Failed to initialize generator:', error instanceof Error ? error.message : 'Unknown error');
204
+ console.error(' Continuing without generation...');
205
+ console.error('');
206
+ }
207
+ }
208
+ // Set generation options if generator is ready
209
+ if (generateFn && generateResponse && !isImage) {
210
+ searchOptions.generateResponse = true;
211
+ searchOptions.generationOptions = {
212
+ maxTokens: maxGenerationTokens,
213
+ temperature: generationTemperature,
214
+ maxChunksForContext: maxChunksForContext
215
+ };
216
+ }
167
217
  // Perform search
168
218
  const startTime = Date.now();
169
219
  let results;
220
+ let generationResult;
170
221
  if (isImage && embedder) {
171
222
  // Image-based search: embed the image and search with the vector
172
223
  console.log('Embedding image...');
@@ -174,8 +225,14 @@ export async function runSearch(query, options = {}) {
174
225
  console.log('Searching with image embedding...');
175
226
  results = await searchEngine.searchWithVector(imageEmbedding.vector, searchOptions);
176
227
  }
228
+ else if (generateResponse && generateFn) {
229
+ // Text-based search with generation
230
+ const searchResult = await searchEngine.searchWithGeneration(query, searchOptions);
231
+ results = searchResult.results;
232
+ generationResult = searchResult.generation;
233
+ }
177
234
  else {
178
- // Text-based search
235
+ // Standard text-based search
179
236
  results = await searchEngine.search(query, searchOptions);
180
237
  }
181
238
  const searchTime = Date.now() - startTime;
@@ -216,6 +273,21 @@ export async function runSearch(query, options = {}) {
216
273
  }
217
274
  console.log('');
218
275
  });
276
+ // Display generated response if available (experimental)
277
+ if (generationResult) {
278
+ console.log('─'.repeat(50));
279
+ console.log('🤖 Generated Response [EXPERIMENTAL]');
280
+ console.log(`Model: ${generationResult.modelUsed}`);
281
+ console.log('─'.repeat(50));
282
+ console.log('');
283
+ console.log(generationResult.response);
284
+ console.log('');
285
+ console.log('─'.repeat(50));
286
+ console.log(`⏱️ Generation: ${(generationResult.generationTimeMs / 1000).toFixed(1)}s | ` +
287
+ `📊 ${generationResult.tokensUsed} tokens | ` +
288
+ `📄 ${generationResult.chunksUsedForContext} chunks used` +
289
+ (generationResult.truncated ? ' (context truncated)' : ''));
290
+ }
219
291
  // Show search statistics
220
292
  const stats = await searchEngine.getStats();
221
293
  console.log('─'.repeat(50));
@@ -233,6 +305,9 @@ export async function runSearch(query, options = {}) {
233
305
  else {
234
306
  console.log('Reranking: disabled');
235
307
  }
308
+ if (generationResult) {
309
+ console.log('Generation: enabled [EXPERIMENTAL]');
310
+ }
236
311
  }
237
312
  }
238
313
  finally {
package/dist/esm/cli.js CHANGED
@@ -54,6 +54,7 @@ Examples:
54
54
  raglite search "red car" --content-type image # Search only image results
55
55
  raglite search ./photo.jpg # Search with image (multimodal mode only)
56
56
  raglite search ./image.png --top-k 5 # Find similar images
57
+ raglite search "How does auth work?" --generate # [EXPERIMENTAL] Generate AI response
57
58
  raglite ui # Launch web interface
58
59
 
59
60
  raglite rebuild # Rebuild the entire index
@@ -64,6 +65,13 @@ Options for search:
64
65
  --no-rerank Disable reranking
65
66
  --content-type <type> Filter results by content type: 'text', 'image', or 'all' (default: all)
66
67
 
68
+ [EXPERIMENTAL] AI Response Generation (text mode only):
69
+ --generate Generate an AI response from search results
70
+ --generator <model> Generator model to use (default: SmolLM2-135M-Instruct)
71
+ --max-tokens <n> Maximum tokens to generate (default: 512)
72
+ --temperature <n> Sampling temperature 0-1 (default: 0.1)
73
+ --max-chunks <n> Maximum chunks for context (default: 3 for 135M, 5 for 360M)
74
+
67
75
  Options for ingest:
68
76
  --model <name> Use specific embedding model
69
77
  --mode <mode> Processing mode: 'text' (default) or 'multimodal'
@@ -83,6 +91,12 @@ Available reranking strategies (multimodal mode):
83
91
  text-derived Use image-to-text conversion + cross-encoder (default)
84
92
  disabled No reranking, use vector similarity only
85
93
 
94
+ [EXPERIMENTAL] Available generator models:
95
+ HuggingFaceTB/SmolLM2-135M-Instruct (balanced, recommended default, uses top 3 chunks)
96
+ HuggingFaceTB/SmolLM2-360M-Instruct (higher quality, slower, uses top 5 chunks)
97
+
98
+ Note: Generation requires reranking (--rerank is automatically enabled with --generate)
99
+
86
100
  For more information, visit: https://github.com/your-repo/rag-lite-ts
87
101
  `);
88
102
  }
@@ -126,6 +140,10 @@ function parseArgs() {
126
140
  else if (optionName === 'force-rebuild') {
127
141
  options.forceRebuild = true;
128
142
  }
143
+ else if (optionName === 'generate') {
144
+ // Handle --generate flag for experimental response generation
145
+ options.generate = true;
146
+ }
129
147
  else if (optionName === 'help') {
130
148
  return { command: 'help', args: [], options: {} };
131
149
  }
@@ -136,7 +154,16 @@ function parseArgs() {
136
154
  // Handle options with values
137
155
  const nextArg = args[i + 1];
138
156
  if (nextArg && !nextArg.startsWith('--')) {
139
- options[optionName] = nextArg;
157
+ // Parse numeric values for specific options
158
+ if (optionName === 'max-tokens' || optionName === 'top-k' || optionName === 'max-chunks') {
159
+ options[optionName] = parseInt(nextArg, 10);
160
+ }
161
+ else if (optionName === 'temperature') {
162
+ options[optionName] = parseFloat(nextArg);
163
+ }
164
+ else {
165
+ options[optionName] = nextArg;
166
+ }
140
167
  i++; // Skip the next argument as it's the value
141
168
  }
142
169
  else {