rag-lite-ts 2.3.0 → 2.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cjs/cli/search.js +77 -2
- package/dist/cjs/cli.js +28 -1
- package/dist/cjs/core/abstract-generator.d.ts +97 -0
- package/dist/cjs/core/abstract-generator.js +222 -0
- package/dist/cjs/core/binary-index-format.js +47 -7
- package/dist/cjs/core/generator-registry.d.ts +114 -0
- package/dist/cjs/core/generator-registry.js +280 -0
- package/dist/cjs/core/index.d.ts +4 -0
- package/dist/cjs/core/index.js +11 -0
- package/dist/cjs/core/lazy-dependency-loader.d.ts +43 -0
- package/dist/cjs/core/lazy-dependency-loader.js +111 -2
- package/dist/cjs/core/prompt-templates.d.ts +138 -0
- package/dist/cjs/core/prompt-templates.js +225 -0
- package/dist/cjs/core/response-generator.d.ts +132 -0
- package/dist/cjs/core/response-generator.js +69 -0
- package/dist/cjs/core/search.d.ts +72 -1
- package/dist/cjs/core/search.js +79 -6
- package/dist/cjs/core/types.d.ts +1 -0
- package/dist/cjs/core/vector-index-worker.js +10 -0
- package/dist/cjs/core/vector-index.js +69 -19
- package/dist/cjs/factories/generator-factory.d.ts +88 -0
- package/dist/cjs/factories/generator-factory.js +151 -0
- package/dist/cjs/factories/index.d.ts +1 -0
- package/dist/cjs/factories/index.js +5 -0
- package/dist/cjs/index.d.ts +9 -0
- package/dist/cjs/index.js +16 -0
- package/dist/cjs/text/generators/causal-lm-generator.d.ts +65 -0
- package/dist/cjs/text/generators/causal-lm-generator.js +197 -0
- package/dist/cjs/text/generators/index.d.ts +10 -0
- package/dist/cjs/text/generators/index.js +10 -0
- package/dist/cjs/text/generators/instruct-generator.d.ts +62 -0
- package/dist/cjs/text/generators/instruct-generator.js +192 -0
- package/dist/esm/cli/search.js +77 -2
- package/dist/esm/cli.js +28 -1
- package/dist/esm/core/abstract-generator.d.ts +97 -0
- package/dist/esm/core/abstract-generator.js +222 -0
- package/dist/esm/core/binary-index-format.js +47 -7
- package/dist/esm/core/generator-registry.d.ts +114 -0
- package/dist/esm/core/generator-registry.js +280 -0
- package/dist/esm/core/index.d.ts +4 -0
- package/dist/esm/core/index.js +11 -0
- package/dist/esm/core/lazy-dependency-loader.d.ts +43 -0
- package/dist/esm/core/lazy-dependency-loader.js +111 -2
- package/dist/esm/core/prompt-templates.d.ts +138 -0
- package/dist/esm/core/prompt-templates.js +225 -0
- package/dist/esm/core/response-generator.d.ts +132 -0
- package/dist/esm/core/response-generator.js +69 -0
- package/dist/esm/core/search.d.ts +72 -1
- package/dist/esm/core/search.js +79 -6
- package/dist/esm/core/types.d.ts +1 -0
- package/dist/esm/core/vector-index-worker.js +10 -0
- package/dist/esm/core/vector-index.js +69 -19
- package/dist/esm/factories/generator-factory.d.ts +88 -0
- package/dist/esm/factories/generator-factory.js +151 -0
- package/dist/esm/factories/index.d.ts +1 -0
- package/dist/esm/factories/index.js +5 -0
- package/dist/esm/index.d.ts +9 -0
- package/dist/esm/index.js +16 -0
- package/dist/esm/text/generators/causal-lm-generator.d.ts +65 -0
- package/dist/esm/text/generators/causal-lm-generator.js +197 -0
- package/dist/esm/text/generators/index.d.ts +10 -0
- package/dist/esm/text/generators/index.js +10 -0
- package/dist/esm/text/generators/instruct-generator.d.ts +62 -0
- package/dist/esm/text/generators/instruct-generator.js +192 -0
- package/package.json +1 -1
|
@@ -0,0 +1,197 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* TEXT IMPLEMENTATION — Causal LM Generator for DistilGPT2
|
|
3
|
+
*
|
|
4
|
+
* Implements ResponseGenerator interface for causal language models.
|
|
5
|
+
* Supports Xenova/distilgpt2 for fast, basic text generation.
|
|
6
|
+
*
|
|
7
|
+
* Features:
|
|
8
|
+
* - Simple prompt formatting (no chat template)
|
|
9
|
+
* - Fast generation with smaller model
|
|
10
|
+
* - Streaming generation support
|
|
11
|
+
* - Resource management via ResourceManager
|
|
12
|
+
*
|
|
13
|
+
* Note: Causal LM models don't support system prompts, so responses
|
|
14
|
+
* may be less focused than instruction-tuned models.
|
|
15
|
+
*
|
|
16
|
+
* @experimental This feature is experimental and may change in future versions.
|
|
17
|
+
*/
|
|
18
|
+
import '../../dom-polyfills.js';
|
|
19
|
+
import { BaseResponseGenerator } from '../../core/abstract-generator.js';
|
|
20
|
+
import { GenerationError } from '../../core/response-generator.js';
|
|
21
|
+
import { getResourceManager } from '../../core/resource-manager.js';
|
|
22
|
+
import { config } from '../../core/config.js';
|
|
23
|
+
// =============================================================================
|
|
24
|
+
// CAUSAL LM GENERATOR IMPLEMENTATION
|
|
25
|
+
// =============================================================================
|
|
26
|
+
/**
|
|
27
|
+
* Causal LM generator implementation for DistilGPT2
|
|
28
|
+
*
|
|
29
|
+
* Uses causal language models that generate text based on simple prompts.
|
|
30
|
+
* Faster but may produce less focused responses than instruct models.
|
|
31
|
+
*/
|
|
32
|
+
export class CausalLMGenerator extends BaseResponseGenerator {
|
|
33
|
+
pipeline = null;
|
|
34
|
+
tokenizer = null;
|
|
35
|
+
resourceManager = getResourceManager();
|
|
36
|
+
resourceId;
|
|
37
|
+
constructor(modelName, options = {}) {
|
|
38
|
+
super(modelName, options);
|
|
39
|
+
// Validate model is a causal-lm model
|
|
40
|
+
if (this.modelType !== 'causal-lm') {
|
|
41
|
+
throw new Error(`CausalLMGenerator requires a causal-lm model, but '${modelName}' is type '${this.modelType}'`);
|
|
42
|
+
}
|
|
43
|
+
}
|
|
44
|
+
// =============================================================================
|
|
45
|
+
// MODEL LIFECYCLE
|
|
46
|
+
// =============================================================================
|
|
47
|
+
/**
|
|
48
|
+
* Load the causal LM model using transformers.js
|
|
49
|
+
*/
|
|
50
|
+
async loadModel() {
|
|
51
|
+
if (this._isLoaded && this.pipeline) {
|
|
52
|
+
return;
|
|
53
|
+
}
|
|
54
|
+
try {
|
|
55
|
+
this.logModelLoading('Loading causal LM generator model');
|
|
56
|
+
// Ensure DOM polyfills
|
|
57
|
+
if (typeof globalThis.self === 'undefined') {
|
|
58
|
+
globalThis.self = globalThis;
|
|
59
|
+
}
|
|
60
|
+
// Dynamic import transformers.js
|
|
61
|
+
const { pipeline, AutoTokenizer } = await import('@huggingface/transformers');
|
|
62
|
+
// Load tokenizer first for token counting
|
|
63
|
+
this.logModelLoading('Loading tokenizer');
|
|
64
|
+
this.tokenizer = await AutoTokenizer.from_pretrained(this.modelName, {
|
|
65
|
+
cache_dir: this._options.cachePath || config.model_cache_path
|
|
66
|
+
});
|
|
67
|
+
// Load text generation pipeline
|
|
68
|
+
this.logModelLoading('Loading text generation pipeline');
|
|
69
|
+
this.pipeline = await pipeline('text-generation', this.modelName, {
|
|
70
|
+
cache_dir: this._options.cachePath || config.model_cache_path,
|
|
71
|
+
dtype: 'fp32'
|
|
72
|
+
});
|
|
73
|
+
// Register with resource manager
|
|
74
|
+
this.resourceId = this.resourceManager.registerModel(this.pipeline, this.modelName, 'generator');
|
|
75
|
+
this._isLoaded = true;
|
|
76
|
+
this.logModelLoading('Model loaded successfully');
|
|
77
|
+
}
|
|
78
|
+
catch (error) {
|
|
79
|
+
this._isLoaded = false;
|
|
80
|
+
throw this.handleLoadingError(error);
|
|
81
|
+
}
|
|
82
|
+
}
|
|
83
|
+
/**
|
|
84
|
+
* Clean up model resources
|
|
85
|
+
*/
|
|
86
|
+
async cleanup() {
|
|
87
|
+
try {
|
|
88
|
+
if (this.resourceId) {
|
|
89
|
+
await this.resourceManager.cleanupResource(this.resourceId);
|
|
90
|
+
this.resourceId = undefined;
|
|
91
|
+
}
|
|
92
|
+
// Clear references
|
|
93
|
+
this.pipeline = null;
|
|
94
|
+
this.tokenizer = null;
|
|
95
|
+
this._isLoaded = false;
|
|
96
|
+
// Force GC if available
|
|
97
|
+
if (global.gc) {
|
|
98
|
+
global.gc();
|
|
99
|
+
}
|
|
100
|
+
this.logModelLoading('Resources cleaned up');
|
|
101
|
+
}
|
|
102
|
+
catch (error) {
|
|
103
|
+
console.warn(`Cleanup error: ${error instanceof Error ? error.message : 'Unknown'}`);
|
|
104
|
+
this.pipeline = null;
|
|
105
|
+
this.tokenizer = null;
|
|
106
|
+
this._isLoaded = false;
|
|
107
|
+
}
|
|
108
|
+
}
|
|
109
|
+
// =============================================================================
|
|
110
|
+
// GENERATION IMPLEMENTATION
|
|
111
|
+
// =============================================================================
|
|
112
|
+
/**
|
|
113
|
+
* Generate text using the causal LM model
|
|
114
|
+
*/
|
|
115
|
+
async generateText(prompt, options) {
|
|
116
|
+
this.ensureLoaded();
|
|
117
|
+
try {
|
|
118
|
+
// Count prompt tokens
|
|
119
|
+
const promptTokens = await this.countTokens(prompt);
|
|
120
|
+
// Generate - GPT2 uses return_full_text differently
|
|
121
|
+
const result = await this.pipeline(prompt, {
|
|
122
|
+
max_new_tokens: options.maxTokens,
|
|
123
|
+
temperature: Math.max(0.1, options.temperature), // GPT2 needs temp > 0
|
|
124
|
+
top_p: options.topP,
|
|
125
|
+
top_k: options.topK,
|
|
126
|
+
repetition_penalty: options.repetitionPenalty,
|
|
127
|
+
do_sample: true,
|
|
128
|
+
return_full_text: true, // GPT2 needs full text
|
|
129
|
+
pad_token_id: this.tokenizer?.eos_token_id // GPT2 uses eos as pad
|
|
130
|
+
});
|
|
131
|
+
// Extract generated text (remove prompt)
|
|
132
|
+
let generatedText = result[0]?.generated_text || '';
|
|
133
|
+
if (generatedText.startsWith(prompt)) {
|
|
134
|
+
generatedText = generatedText.substring(prompt.length);
|
|
135
|
+
}
|
|
136
|
+
// Process stop sequences
|
|
137
|
+
let finalText = generatedText;
|
|
138
|
+
let finishReason = 'complete';
|
|
139
|
+
for (const stopSeq of options.stopSequences) {
|
|
140
|
+
const stopIndex = finalText.indexOf(stopSeq);
|
|
141
|
+
if (stopIndex !== -1) {
|
|
142
|
+
finalText = finalText.substring(0, stopIndex);
|
|
143
|
+
finishReason = 'stop_sequence';
|
|
144
|
+
break;
|
|
145
|
+
}
|
|
146
|
+
}
|
|
147
|
+
// Count completion tokens
|
|
148
|
+
const completionTokens = await this.countTokens(finalText);
|
|
149
|
+
// Check if we hit max tokens
|
|
150
|
+
if (completionTokens >= options.maxTokens - 5) {
|
|
151
|
+
finishReason = 'length';
|
|
152
|
+
}
|
|
153
|
+
return {
|
|
154
|
+
text: finalText,
|
|
155
|
+
promptTokens,
|
|
156
|
+
completionTokens,
|
|
157
|
+
finishReason
|
|
158
|
+
};
|
|
159
|
+
}
|
|
160
|
+
catch (error) {
|
|
161
|
+
throw new GenerationError(this.modelName, 'generation', `Text generation failed: ${error instanceof Error ? error.message : 'Unknown error'}`, error instanceof Error ? error : undefined);
|
|
162
|
+
}
|
|
163
|
+
}
|
|
164
|
+
/**
|
|
165
|
+
* Generate text with streaming output
|
|
166
|
+
*/
|
|
167
|
+
async *generateStream(request) {
|
|
168
|
+
// For now, fall back to non-streaming and yield the full response
|
|
169
|
+
// TODO: Implement true streaming when transformers.js supports it better
|
|
170
|
+
const result = await this.generate(request);
|
|
171
|
+
yield result.response;
|
|
172
|
+
}
|
|
173
|
+
// =============================================================================
|
|
174
|
+
// HELPER METHODS
|
|
175
|
+
// =============================================================================
|
|
176
|
+
/**
|
|
177
|
+
* Count tokens in a text string
|
|
178
|
+
*/
|
|
179
|
+
async countTokens(text) {
|
|
180
|
+
if (!this.tokenizer) {
|
|
181
|
+
// Fallback to estimation
|
|
182
|
+
return Math.ceil(text.length / 4);
|
|
183
|
+
}
|
|
184
|
+
try {
|
|
185
|
+
const encoded = await this.tokenizer(text, {
|
|
186
|
+
return_tensors: false,
|
|
187
|
+
padding: false,
|
|
188
|
+
truncation: false
|
|
189
|
+
});
|
|
190
|
+
return encoded.input_ids?.length || Math.ceil(text.length / 4);
|
|
191
|
+
}
|
|
192
|
+
catch {
|
|
193
|
+
return Math.ceil(text.length / 4);
|
|
194
|
+
}
|
|
195
|
+
}
|
|
196
|
+
}
|
|
197
|
+
//# sourceMappingURL=causal-lm-generator.js.map
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* TEXT GENERATORS — Export Module
|
|
3
|
+
*
|
|
4
|
+
* Provides text generation capabilities for RAG response synthesis.
|
|
5
|
+
*
|
|
6
|
+
* @experimental This feature is experimental and may change in future versions.
|
|
7
|
+
*/
|
|
8
|
+
export { InstructGenerator } from './instruct-generator.js';
|
|
9
|
+
export { CausalLMGenerator } from './causal-lm-generator.js';
|
|
10
|
+
//# sourceMappingURL=index.d.ts.map
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* TEXT GENERATORS — Export Module
|
|
3
|
+
*
|
|
4
|
+
* Provides text generation capabilities for RAG response synthesis.
|
|
5
|
+
*
|
|
6
|
+
* @experimental This feature is experimental and may change in future versions.
|
|
7
|
+
*/
|
|
8
|
+
export { InstructGenerator } from './instruct-generator.js';
|
|
9
|
+
export { CausalLMGenerator } from './causal-lm-generator.js';
|
|
10
|
+
//# sourceMappingURL=index.js.map
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* TEXT IMPLEMENTATION — Instruct Generator for SmolLM2-Instruct Models
|
|
3
|
+
*
|
|
4
|
+
* Implements ResponseGenerator interface for instruction-tuned models.
|
|
5
|
+
* Supports SmolLM2-135M-Instruct and SmolLM2-360M-Instruct.
|
|
6
|
+
*
|
|
7
|
+
* Features:
|
|
8
|
+
* - Chat template formatting with system/user/assistant roles
|
|
9
|
+
* - Streaming generation support
|
|
10
|
+
* - Proper stop sequence handling
|
|
11
|
+
* - Resource management via ResourceManager
|
|
12
|
+
*
|
|
13
|
+
* @experimental This feature is experimental and may change in future versions.
|
|
14
|
+
*/
|
|
15
|
+
import '../../dom-polyfills.js';
|
|
16
|
+
import { BaseResponseGenerator, type GeneratorOptions } from '../../core/abstract-generator.js';
|
|
17
|
+
/**
|
|
18
|
+
* Instruct generator implementation for SmolLM2-Instruct models
|
|
19
|
+
*
|
|
20
|
+
* Uses instruction-tuned models that understand chat templates with
|
|
21
|
+
* system, user, and assistant roles for better response quality.
|
|
22
|
+
*/
|
|
23
|
+
export declare class InstructGenerator extends BaseResponseGenerator {
|
|
24
|
+
private pipeline;
|
|
25
|
+
private tokenizer;
|
|
26
|
+
private resourceManager;
|
|
27
|
+
private resourceId?;
|
|
28
|
+
constructor(modelName: string, options?: GeneratorOptions);
|
|
29
|
+
/**
|
|
30
|
+
* Load the instruct model using transformers.js
|
|
31
|
+
*/
|
|
32
|
+
loadModel(): Promise<void>;
|
|
33
|
+
/**
|
|
34
|
+
* Clean up model resources
|
|
35
|
+
*/
|
|
36
|
+
cleanup(): Promise<void>;
|
|
37
|
+
/**
|
|
38
|
+
* Generate text using the instruct model
|
|
39
|
+
*/
|
|
40
|
+
protected generateText(prompt: string, options: {
|
|
41
|
+
maxTokens: number;
|
|
42
|
+
temperature: number;
|
|
43
|
+
topP: number;
|
|
44
|
+
topK: number;
|
|
45
|
+
repetitionPenalty: number;
|
|
46
|
+
stopSequences: string[];
|
|
47
|
+
}): Promise<{
|
|
48
|
+
text: string;
|
|
49
|
+
promptTokens: number;
|
|
50
|
+
completionTokens: number;
|
|
51
|
+
finishReason: 'complete' | 'length' | 'stop_sequence' | 'error';
|
|
52
|
+
}>;
|
|
53
|
+
/**
|
|
54
|
+
* Generate text with streaming output
|
|
55
|
+
*/
|
|
56
|
+
generateStream(request: import('../../core/response-generator.js').GenerationRequest): AsyncIterable<string>;
|
|
57
|
+
/**
|
|
58
|
+
* Count tokens in a text string
|
|
59
|
+
*/
|
|
60
|
+
private countTokens;
|
|
61
|
+
}
|
|
62
|
+
//# sourceMappingURL=instruct-generator.d.ts.map
|
|
@@ -0,0 +1,192 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* TEXT IMPLEMENTATION — Instruct Generator for SmolLM2-Instruct Models
|
|
3
|
+
*
|
|
4
|
+
* Implements ResponseGenerator interface for instruction-tuned models.
|
|
5
|
+
* Supports SmolLM2-135M-Instruct and SmolLM2-360M-Instruct.
|
|
6
|
+
*
|
|
7
|
+
* Features:
|
|
8
|
+
* - Chat template formatting with system/user/assistant roles
|
|
9
|
+
* - Streaming generation support
|
|
10
|
+
* - Proper stop sequence handling
|
|
11
|
+
* - Resource management via ResourceManager
|
|
12
|
+
*
|
|
13
|
+
* @experimental This feature is experimental and may change in future versions.
|
|
14
|
+
*/
|
|
15
|
+
import '../../dom-polyfills.js';
|
|
16
|
+
import { BaseResponseGenerator } from '../../core/abstract-generator.js';
|
|
17
|
+
import { GenerationError } from '../../core/response-generator.js';
|
|
18
|
+
import { getResourceManager } from '../../core/resource-manager.js';
|
|
19
|
+
import { config } from '../../core/config.js';
|
|
20
|
+
// =============================================================================
|
|
21
|
+
// INSTRUCT GENERATOR IMPLEMENTATION
|
|
22
|
+
// =============================================================================
|
|
23
|
+
/**
|
|
24
|
+
* Instruct generator implementation for SmolLM2-Instruct models
|
|
25
|
+
*
|
|
26
|
+
* Uses instruction-tuned models that understand chat templates with
|
|
27
|
+
* system, user, and assistant roles for better response quality.
|
|
28
|
+
*/
|
|
29
|
+
export class InstructGenerator extends BaseResponseGenerator {
|
|
30
|
+
pipeline = null;
|
|
31
|
+
tokenizer = null;
|
|
32
|
+
resourceManager = getResourceManager();
|
|
33
|
+
resourceId;
|
|
34
|
+
constructor(modelName, options = {}) {
|
|
35
|
+
super(modelName, options);
|
|
36
|
+
// Validate model is an instruct model
|
|
37
|
+
if (this.modelType !== 'instruct') {
|
|
38
|
+
throw new Error(`InstructGenerator requires an instruct model, but '${modelName}' is type '${this.modelType}'`);
|
|
39
|
+
}
|
|
40
|
+
}
|
|
41
|
+
// =============================================================================
|
|
42
|
+
// MODEL LIFECYCLE
|
|
43
|
+
// =============================================================================
|
|
44
|
+
/**
|
|
45
|
+
* Load the instruct model using transformers.js
|
|
46
|
+
*/
|
|
47
|
+
async loadModel() {
|
|
48
|
+
if (this._isLoaded && this.pipeline) {
|
|
49
|
+
return;
|
|
50
|
+
}
|
|
51
|
+
try {
|
|
52
|
+
this.logModelLoading('Loading instruct generator model');
|
|
53
|
+
// Ensure DOM polyfills
|
|
54
|
+
if (typeof globalThis.self === 'undefined') {
|
|
55
|
+
globalThis.self = globalThis;
|
|
56
|
+
}
|
|
57
|
+
// Dynamic import transformers.js
|
|
58
|
+
const { pipeline, AutoTokenizer } = await import('@huggingface/transformers');
|
|
59
|
+
// Load tokenizer first for token counting
|
|
60
|
+
this.logModelLoading('Loading tokenizer');
|
|
61
|
+
this.tokenizer = await AutoTokenizer.from_pretrained(this.modelName, {
|
|
62
|
+
cache_dir: this._options.cachePath || config.model_cache_path
|
|
63
|
+
});
|
|
64
|
+
// Load text generation pipeline
|
|
65
|
+
this.logModelLoading('Loading text generation pipeline');
|
|
66
|
+
this.pipeline = await pipeline('text-generation', this.modelName, {
|
|
67
|
+
cache_dir: this._options.cachePath || config.model_cache_path,
|
|
68
|
+
dtype: 'fp32'
|
|
69
|
+
});
|
|
70
|
+
// Register with resource manager
|
|
71
|
+
this.resourceId = this.resourceManager.registerModel(this.pipeline, this.modelName, 'generator');
|
|
72
|
+
this._isLoaded = true;
|
|
73
|
+
this.logModelLoading('Model loaded successfully');
|
|
74
|
+
}
|
|
75
|
+
catch (error) {
|
|
76
|
+
this._isLoaded = false;
|
|
77
|
+
throw this.handleLoadingError(error);
|
|
78
|
+
}
|
|
79
|
+
}
|
|
80
|
+
/**
|
|
81
|
+
* Clean up model resources
|
|
82
|
+
*/
|
|
83
|
+
async cleanup() {
|
|
84
|
+
try {
|
|
85
|
+
if (this.resourceId) {
|
|
86
|
+
await this.resourceManager.cleanupResource(this.resourceId);
|
|
87
|
+
this.resourceId = undefined;
|
|
88
|
+
}
|
|
89
|
+
// Clear references
|
|
90
|
+
this.pipeline = null;
|
|
91
|
+
this.tokenizer = null;
|
|
92
|
+
this._isLoaded = false;
|
|
93
|
+
// Force GC if available
|
|
94
|
+
if (global.gc) {
|
|
95
|
+
global.gc();
|
|
96
|
+
}
|
|
97
|
+
this.logModelLoading('Resources cleaned up');
|
|
98
|
+
}
|
|
99
|
+
catch (error) {
|
|
100
|
+
console.warn(`Cleanup error: ${error instanceof Error ? error.message : 'Unknown'}`);
|
|
101
|
+
this.pipeline = null;
|
|
102
|
+
this.tokenizer = null;
|
|
103
|
+
this._isLoaded = false;
|
|
104
|
+
}
|
|
105
|
+
}
|
|
106
|
+
// =============================================================================
|
|
107
|
+
// GENERATION IMPLEMENTATION
|
|
108
|
+
// =============================================================================
|
|
109
|
+
/**
|
|
110
|
+
* Generate text using the instruct model
|
|
111
|
+
*/
|
|
112
|
+
async generateText(prompt, options) {
|
|
113
|
+
this.ensureLoaded();
|
|
114
|
+
try {
|
|
115
|
+
// Count prompt tokens
|
|
116
|
+
const promptTokens = await this.countTokens(prompt);
|
|
117
|
+
// Generate
|
|
118
|
+
const result = await this.pipeline(prompt, {
|
|
119
|
+
max_new_tokens: options.maxTokens,
|
|
120
|
+
temperature: options.temperature,
|
|
121
|
+
top_p: options.topP,
|
|
122
|
+
top_k: options.topK,
|
|
123
|
+
repetition_penalty: options.repetitionPenalty,
|
|
124
|
+
do_sample: options.temperature > 0,
|
|
125
|
+
return_full_text: false,
|
|
126
|
+
pad_token_id: this.tokenizer?.pad_token_id,
|
|
127
|
+
eos_token_id: this.tokenizer?.eos_token_id
|
|
128
|
+
});
|
|
129
|
+
// Extract generated text
|
|
130
|
+
const generatedText = result[0]?.generated_text || '';
|
|
131
|
+
// Process stop sequences
|
|
132
|
+
let finalText = generatedText;
|
|
133
|
+
let finishReason = 'complete';
|
|
134
|
+
for (const stopSeq of options.stopSequences) {
|
|
135
|
+
const stopIndex = finalText.indexOf(stopSeq);
|
|
136
|
+
if (stopIndex !== -1) {
|
|
137
|
+
finalText = finalText.substring(0, stopIndex);
|
|
138
|
+
finishReason = 'stop_sequence';
|
|
139
|
+
break;
|
|
140
|
+
}
|
|
141
|
+
}
|
|
142
|
+
// Count completion tokens
|
|
143
|
+
const completionTokens = await this.countTokens(finalText);
|
|
144
|
+
// Check if we hit max tokens
|
|
145
|
+
if (completionTokens >= options.maxTokens - 5) {
|
|
146
|
+
finishReason = 'length';
|
|
147
|
+
}
|
|
148
|
+
return {
|
|
149
|
+
text: finalText,
|
|
150
|
+
promptTokens,
|
|
151
|
+
completionTokens,
|
|
152
|
+
finishReason
|
|
153
|
+
};
|
|
154
|
+
}
|
|
155
|
+
catch (error) {
|
|
156
|
+
throw new GenerationError(this.modelName, 'generation', `Text generation failed: ${error instanceof Error ? error.message : 'Unknown error'}`, error instanceof Error ? error : undefined);
|
|
157
|
+
}
|
|
158
|
+
}
|
|
159
|
+
/**
|
|
160
|
+
* Generate text with streaming output
|
|
161
|
+
*/
|
|
162
|
+
async *generateStream(request) {
|
|
163
|
+
// For now, fall back to non-streaming and yield the full response
|
|
164
|
+
// TODO: Implement true streaming when transformers.js supports it better
|
|
165
|
+
const result = await this.generate(request);
|
|
166
|
+
yield result.response;
|
|
167
|
+
}
|
|
168
|
+
// =============================================================================
|
|
169
|
+
// HELPER METHODS
|
|
170
|
+
// =============================================================================
|
|
171
|
+
/**
|
|
172
|
+
* Count tokens in a text string
|
|
173
|
+
*/
|
|
174
|
+
async countTokens(text) {
|
|
175
|
+
if (!this.tokenizer) {
|
|
176
|
+
// Fallback to estimation
|
|
177
|
+
return Math.ceil(text.length / 4);
|
|
178
|
+
}
|
|
179
|
+
try {
|
|
180
|
+
const encoded = await this.tokenizer(text, {
|
|
181
|
+
return_tensors: false,
|
|
182
|
+
padding: false,
|
|
183
|
+
truncation: false
|
|
184
|
+
});
|
|
185
|
+
return encoded.input_ids?.length || Math.ceil(text.length / 4);
|
|
186
|
+
}
|
|
187
|
+
catch {
|
|
188
|
+
return Math.ceil(text.length / 4);
|
|
189
|
+
}
|
|
190
|
+
}
|
|
191
|
+
}
|
|
192
|
+
//# sourceMappingURL=instruct-generator.js.map
|
package/dist/esm/cli/search.js
CHANGED
|
@@ -132,7 +132,7 @@ export async function runSearch(query, options = {}) {
|
|
|
132
132
|
process.exit(EXIT_CODES.MODEL_ERROR);
|
|
133
133
|
}
|
|
134
134
|
}
|
|
135
|
-
// Prepare search options
|
|
135
|
+
// Prepare search options (with generation support)
|
|
136
136
|
const searchOptions = {};
|
|
137
137
|
if (options['top-k'] !== undefined) {
|
|
138
138
|
searchOptions.top_k = options['top-k'];
|
|
@@ -164,9 +164,60 @@ export async function runSearch(query, options = {}) {
|
|
|
164
164
|
}
|
|
165
165
|
// Track whether reranking will actually be used in this search
|
|
166
166
|
const rerankingUsed = searchOptions.rerank === true;
|
|
167
|
+
// Handle generation options (experimental, text mode only)
|
|
168
|
+
const generateResponse = options.generate === true;
|
|
169
|
+
const generatorModel = options.generator;
|
|
170
|
+
const maxGenerationTokens = options['max-tokens'];
|
|
171
|
+
const generationTemperature = options.temperature;
|
|
172
|
+
const maxChunksForContext = options['max-chunks'];
|
|
173
|
+
// Generation only supported in text mode
|
|
174
|
+
if (generateResponse && isImage) {
|
|
175
|
+
console.warn('⚠️ [EXPERIMENTAL] Generation is only supported for text searches.');
|
|
176
|
+
console.warn(' Image search results will be returned without generation.');
|
|
177
|
+
console.warn('');
|
|
178
|
+
}
|
|
179
|
+
// Generation requires reranking - enable it automatically
|
|
180
|
+
let rerankingEnabledForGeneration = false;
|
|
181
|
+
if (generateResponse && !isImage && !searchOptions.rerank) {
|
|
182
|
+
searchOptions.rerank = true;
|
|
183
|
+
rerankingEnabledForGeneration = true;
|
|
184
|
+
console.log('📋 Reranking automatically enabled (required for generation)');
|
|
185
|
+
}
|
|
186
|
+
// Set up generator if generation is requested (text mode only)
|
|
187
|
+
let generateFn;
|
|
188
|
+
if (generateResponse && !isImage) {
|
|
189
|
+
try {
|
|
190
|
+
console.log('🤖 [EXPERIMENTAL] Initializing response generator...');
|
|
191
|
+
const { createGenerateFunctionFromModel, getDefaultGeneratorModel } = await import('../factories/generator-factory.js');
|
|
192
|
+
const { getDefaultMaxChunksForContext } = await import('../core/generator-registry.js');
|
|
193
|
+
const modelToUse = generatorModel || getDefaultGeneratorModel();
|
|
194
|
+
const defaultChunks = getDefaultMaxChunksForContext(modelToUse) || 3;
|
|
195
|
+
console.log(` Model: ${modelToUse}`);
|
|
196
|
+
console.log(` Max chunks for context: ${maxChunksForContext || defaultChunks} (default: ${defaultChunks})`);
|
|
197
|
+
generateFn = await createGenerateFunctionFromModel(modelToUse);
|
|
198
|
+
searchEngine.setGenerateFunction(generateFn);
|
|
199
|
+
console.log('✅ Generator initialized');
|
|
200
|
+
console.log('');
|
|
201
|
+
}
|
|
202
|
+
catch (error) {
|
|
203
|
+
console.error('❌ [EXPERIMENTAL] Failed to initialize generator:', error instanceof Error ? error.message : 'Unknown error');
|
|
204
|
+
console.error(' Continuing without generation...');
|
|
205
|
+
console.error('');
|
|
206
|
+
}
|
|
207
|
+
}
|
|
208
|
+
// Set generation options if generator is ready
|
|
209
|
+
if (generateFn && generateResponse && !isImage) {
|
|
210
|
+
searchOptions.generateResponse = true;
|
|
211
|
+
searchOptions.generationOptions = {
|
|
212
|
+
maxTokens: maxGenerationTokens,
|
|
213
|
+
temperature: generationTemperature,
|
|
214
|
+
maxChunksForContext: maxChunksForContext
|
|
215
|
+
};
|
|
216
|
+
}
|
|
167
217
|
// Perform search
|
|
168
218
|
const startTime = Date.now();
|
|
169
219
|
let results;
|
|
220
|
+
let generationResult;
|
|
170
221
|
if (isImage && embedder) {
|
|
171
222
|
// Image-based search: embed the image and search with the vector
|
|
172
223
|
console.log('Embedding image...');
|
|
@@ -174,8 +225,14 @@ export async function runSearch(query, options = {}) {
|
|
|
174
225
|
console.log('Searching with image embedding...');
|
|
175
226
|
results = await searchEngine.searchWithVector(imageEmbedding.vector, searchOptions);
|
|
176
227
|
}
|
|
228
|
+
else if (generateResponse && generateFn) {
|
|
229
|
+
// Text-based search with generation
|
|
230
|
+
const searchResult = await searchEngine.searchWithGeneration(query, searchOptions);
|
|
231
|
+
results = searchResult.results;
|
|
232
|
+
generationResult = searchResult.generation;
|
|
233
|
+
}
|
|
177
234
|
else {
|
|
178
|
-
//
|
|
235
|
+
// Standard text-based search
|
|
179
236
|
results = await searchEngine.search(query, searchOptions);
|
|
180
237
|
}
|
|
181
238
|
const searchTime = Date.now() - startTime;
|
|
@@ -216,6 +273,21 @@ export async function runSearch(query, options = {}) {
|
|
|
216
273
|
}
|
|
217
274
|
console.log('');
|
|
218
275
|
});
|
|
276
|
+
// Display generated response if available (experimental)
|
|
277
|
+
if (generationResult) {
|
|
278
|
+
console.log('─'.repeat(50));
|
|
279
|
+
console.log('🤖 Generated Response [EXPERIMENTAL]');
|
|
280
|
+
console.log(`Model: ${generationResult.modelUsed}`);
|
|
281
|
+
console.log('─'.repeat(50));
|
|
282
|
+
console.log('');
|
|
283
|
+
console.log(generationResult.response);
|
|
284
|
+
console.log('');
|
|
285
|
+
console.log('─'.repeat(50));
|
|
286
|
+
console.log(`⏱️ Generation: ${(generationResult.generationTimeMs / 1000).toFixed(1)}s | ` +
|
|
287
|
+
`📊 ${generationResult.tokensUsed} tokens | ` +
|
|
288
|
+
`📄 ${generationResult.chunksUsedForContext} chunks used` +
|
|
289
|
+
(generationResult.truncated ? ' (context truncated)' : ''));
|
|
290
|
+
}
|
|
219
291
|
// Show search statistics
|
|
220
292
|
const stats = await searchEngine.getStats();
|
|
221
293
|
console.log('─'.repeat(50));
|
|
@@ -233,6 +305,9 @@ export async function runSearch(query, options = {}) {
|
|
|
233
305
|
else {
|
|
234
306
|
console.log('Reranking: disabled');
|
|
235
307
|
}
|
|
308
|
+
if (generationResult) {
|
|
309
|
+
console.log('Generation: enabled [EXPERIMENTAL]');
|
|
310
|
+
}
|
|
236
311
|
}
|
|
237
312
|
}
|
|
238
313
|
finally {
|
package/dist/esm/cli.js
CHANGED
|
@@ -54,6 +54,7 @@ Examples:
|
|
|
54
54
|
raglite search "red car" --content-type image # Search only image results
|
|
55
55
|
raglite search ./photo.jpg # Search with image (multimodal mode only)
|
|
56
56
|
raglite search ./image.png --top-k 5 # Find similar images
|
|
57
|
+
raglite search "How does auth work?" --generate # [EXPERIMENTAL] Generate AI response
|
|
57
58
|
raglite ui # Launch web interface
|
|
58
59
|
|
|
59
60
|
raglite rebuild # Rebuild the entire index
|
|
@@ -64,6 +65,13 @@ Options for search:
|
|
|
64
65
|
--no-rerank Disable reranking
|
|
65
66
|
--content-type <type> Filter results by content type: 'text', 'image', or 'all' (default: all)
|
|
66
67
|
|
|
68
|
+
[EXPERIMENTAL] AI Response Generation (text mode only):
|
|
69
|
+
--generate Generate an AI response from search results
|
|
70
|
+
--generator <model> Generator model to use (default: SmolLM2-135M-Instruct)
|
|
71
|
+
--max-tokens <n> Maximum tokens to generate (default: 512)
|
|
72
|
+
--temperature <n> Sampling temperature 0-1 (default: 0.1)
|
|
73
|
+
--max-chunks <n> Maximum chunks for context (default: 3 for 135M, 5 for 360M)
|
|
74
|
+
|
|
67
75
|
Options for ingest:
|
|
68
76
|
--model <name> Use specific embedding model
|
|
69
77
|
--mode <mode> Processing mode: 'text' (default) or 'multimodal'
|
|
@@ -83,6 +91,12 @@ Available reranking strategies (multimodal mode):
|
|
|
83
91
|
text-derived Use image-to-text conversion + cross-encoder (default)
|
|
84
92
|
disabled No reranking, use vector similarity only
|
|
85
93
|
|
|
94
|
+
[EXPERIMENTAL] Available generator models:
|
|
95
|
+
HuggingFaceTB/SmolLM2-135M-Instruct (balanced, recommended default, uses top 3 chunks)
|
|
96
|
+
HuggingFaceTB/SmolLM2-360M-Instruct (higher quality, slower, uses top 5 chunks)
|
|
97
|
+
|
|
98
|
+
Note: Generation requires reranking (--rerank is automatically enabled with --generate)
|
|
99
|
+
|
|
86
100
|
For more information, visit: https://github.com/your-repo/rag-lite-ts
|
|
87
101
|
`);
|
|
88
102
|
}
|
|
@@ -126,6 +140,10 @@ function parseArgs() {
|
|
|
126
140
|
else if (optionName === 'force-rebuild') {
|
|
127
141
|
options.forceRebuild = true;
|
|
128
142
|
}
|
|
143
|
+
else if (optionName === 'generate') {
|
|
144
|
+
// Handle --generate flag for experimental response generation
|
|
145
|
+
options.generate = true;
|
|
146
|
+
}
|
|
129
147
|
else if (optionName === 'help') {
|
|
130
148
|
return { command: 'help', args: [], options: {} };
|
|
131
149
|
}
|
|
@@ -136,7 +154,16 @@ function parseArgs() {
|
|
|
136
154
|
// Handle options with values
|
|
137
155
|
const nextArg = args[i + 1];
|
|
138
156
|
if (nextArg && !nextArg.startsWith('--')) {
|
|
139
|
-
|
|
157
|
+
// Parse numeric values for specific options
|
|
158
|
+
if (optionName === 'max-tokens' || optionName === 'top-k' || optionName === 'max-chunks') {
|
|
159
|
+
options[optionName] = parseInt(nextArg, 10);
|
|
160
|
+
}
|
|
161
|
+
else if (optionName === 'temperature') {
|
|
162
|
+
options[optionName] = parseFloat(nextArg);
|
|
163
|
+
}
|
|
164
|
+
else {
|
|
165
|
+
options[optionName] = nextArg;
|
|
166
|
+
}
|
|
140
167
|
i++; // Skip the next argument as it's the value
|
|
141
168
|
}
|
|
142
169
|
else {
|