rag-lite-ts 2.2.0 → 2.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +88 -5
- package/dist/cjs/cli/indexer.js +73 -15
- package/dist/cjs/cli/search.js +77 -2
- package/dist/cjs/cli/ui-server.d.ts +5 -0
- package/dist/cjs/cli/ui-server.js +152 -0
- package/dist/cjs/cli.js +53 -7
- package/dist/cjs/core/abstract-generator.d.ts +97 -0
- package/dist/cjs/core/abstract-generator.js +222 -0
- package/dist/cjs/core/binary-index-format.js +53 -10
- package/dist/cjs/core/db.d.ts +56 -0
- package/dist/cjs/core/db.js +105 -0
- package/dist/cjs/core/generator-registry.d.ts +114 -0
- package/dist/cjs/core/generator-registry.js +280 -0
- package/dist/cjs/core/index.d.ts +4 -0
- package/dist/cjs/core/index.js +11 -0
- package/dist/cjs/core/ingestion.js +3 -0
- package/dist/cjs/core/knowledge-base-manager.d.ts +109 -0
- package/dist/cjs/core/knowledge-base-manager.js +256 -0
- package/dist/cjs/core/lazy-dependency-loader.d.ts +43 -0
- package/dist/cjs/core/lazy-dependency-loader.js +111 -2
- package/dist/cjs/core/prompt-templates.d.ts +138 -0
- package/dist/cjs/core/prompt-templates.js +225 -0
- package/dist/cjs/core/response-generator.d.ts +132 -0
- package/dist/cjs/core/response-generator.js +69 -0
- package/dist/cjs/core/search-pipeline.js +1 -1
- package/dist/cjs/core/search.d.ts +72 -1
- package/dist/cjs/core/search.js +80 -7
- package/dist/cjs/core/types.d.ts +1 -0
- package/dist/cjs/core/vector-index-messages.d.ts +52 -0
- package/dist/cjs/core/vector-index-messages.js +5 -0
- package/dist/cjs/core/vector-index-worker.d.ts +6 -0
- package/dist/cjs/core/vector-index-worker.js +314 -0
- package/dist/cjs/core/vector-index.d.ts +45 -10
- package/dist/cjs/core/vector-index.js +279 -218
- package/dist/cjs/factories/generator-factory.d.ts +88 -0
- package/dist/cjs/factories/generator-factory.js +151 -0
- package/dist/cjs/factories/index.d.ts +1 -0
- package/dist/cjs/factories/index.js +5 -0
- package/dist/cjs/factories/ingestion-factory.js +3 -7
- package/dist/cjs/factories/search-factory.js +11 -0
- package/dist/cjs/index-manager.d.ts +23 -3
- package/dist/cjs/index-manager.js +84 -15
- package/dist/cjs/index.d.ts +11 -1
- package/dist/cjs/index.js +19 -1
- package/dist/cjs/text/generators/causal-lm-generator.d.ts +65 -0
- package/dist/cjs/text/generators/causal-lm-generator.js +197 -0
- package/dist/cjs/text/generators/index.d.ts +10 -0
- package/dist/cjs/text/generators/index.js +10 -0
- package/dist/cjs/text/generators/instruct-generator.d.ts +62 -0
- package/dist/cjs/text/generators/instruct-generator.js +192 -0
- package/dist/esm/cli/indexer.js +73 -15
- package/dist/esm/cli/search.js +77 -2
- package/dist/esm/cli/ui-server.d.ts +5 -0
- package/dist/esm/cli/ui-server.js +152 -0
- package/dist/esm/cli.js +53 -7
- package/dist/esm/core/abstract-generator.d.ts +97 -0
- package/dist/esm/core/abstract-generator.js +222 -0
- package/dist/esm/core/binary-index-format.js +53 -10
- package/dist/esm/core/db.d.ts +56 -0
- package/dist/esm/core/db.js +105 -0
- package/dist/esm/core/generator-registry.d.ts +114 -0
- package/dist/esm/core/generator-registry.js +280 -0
- package/dist/esm/core/index.d.ts +4 -0
- package/dist/esm/core/index.js +11 -0
- package/dist/esm/core/ingestion.js +3 -0
- package/dist/esm/core/knowledge-base-manager.d.ts +109 -0
- package/dist/esm/core/knowledge-base-manager.js +256 -0
- package/dist/esm/core/lazy-dependency-loader.d.ts +43 -0
- package/dist/esm/core/lazy-dependency-loader.js +111 -2
- package/dist/esm/core/prompt-templates.d.ts +138 -0
- package/dist/esm/core/prompt-templates.js +225 -0
- package/dist/esm/core/response-generator.d.ts +132 -0
- package/dist/esm/core/response-generator.js +69 -0
- package/dist/esm/core/search-pipeline.js +1 -1
- package/dist/esm/core/search.d.ts +72 -1
- package/dist/esm/core/search.js +80 -7
- package/dist/esm/core/types.d.ts +1 -0
- package/dist/esm/core/vector-index-messages.d.ts +52 -0
- package/dist/esm/core/vector-index-messages.js +5 -0
- package/dist/esm/core/vector-index-worker.d.ts +6 -0
- package/dist/esm/core/vector-index-worker.js +314 -0
- package/dist/esm/core/vector-index.d.ts +45 -10
- package/dist/esm/core/vector-index.js +279 -218
- package/dist/esm/factories/generator-factory.d.ts +88 -0
- package/dist/esm/factories/generator-factory.js +151 -0
- package/dist/esm/factories/index.d.ts +1 -0
- package/dist/esm/factories/index.js +5 -0
- package/dist/esm/factories/ingestion-factory.js +3 -7
- package/dist/esm/factories/search-factory.js +11 -0
- package/dist/esm/index-manager.d.ts +23 -3
- package/dist/esm/index-manager.js +84 -15
- package/dist/esm/index.d.ts +11 -1
- package/dist/esm/index.js +19 -1
- package/dist/esm/text/generators/causal-lm-generator.d.ts +65 -0
- package/dist/esm/text/generators/causal-lm-generator.js +197 -0
- package/dist/esm/text/generators/index.d.ts +10 -0
- package/dist/esm/text/generators/index.js +10 -0
- package/dist/esm/text/generators/instruct-generator.d.ts +62 -0
- package/dist/esm/text/generators/instruct-generator.js +192 -0
- package/package.json +14 -7
|
@@ -0,0 +1,197 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* TEXT IMPLEMENTATION — Causal LM Generator for DistilGPT2
|
|
3
|
+
*
|
|
4
|
+
* Implements ResponseGenerator interface for causal language models.
|
|
5
|
+
* Supports Xenova/distilgpt2 for fast, basic text generation.
|
|
6
|
+
*
|
|
7
|
+
* Features:
|
|
8
|
+
* - Simple prompt formatting (no chat template)
|
|
9
|
+
* - Fast generation with smaller model
|
|
10
|
+
* - Streaming generation support
|
|
11
|
+
* - Resource management via ResourceManager
|
|
12
|
+
*
|
|
13
|
+
* Note: Causal LM models don't support system prompts, so responses
|
|
14
|
+
* may be less focused than instruction-tuned models.
|
|
15
|
+
*
|
|
16
|
+
* @experimental This feature is experimental and may change in future versions.
|
|
17
|
+
*/
|
|
18
|
+
import '../../dom-polyfills.js';
|
|
19
|
+
import { BaseResponseGenerator } from '../../core/abstract-generator.js';
|
|
20
|
+
import { GenerationError } from '../../core/response-generator.js';
|
|
21
|
+
import { getResourceManager } from '../../core/resource-manager.js';
|
|
22
|
+
import { config } from '../../core/config.js';
|
|
23
|
+
// =============================================================================
|
|
24
|
+
// CAUSAL LM GENERATOR IMPLEMENTATION
|
|
25
|
+
// =============================================================================
|
|
26
|
+
/**
|
|
27
|
+
* Causal LM generator implementation for DistilGPT2
|
|
28
|
+
*
|
|
29
|
+
* Uses causal language models that generate text based on simple prompts.
|
|
30
|
+
* Faster but may produce less focused responses than instruct models.
|
|
31
|
+
*/
|
|
32
|
+
export class CausalLMGenerator extends BaseResponseGenerator {
|
|
33
|
+
pipeline = null;
|
|
34
|
+
tokenizer = null;
|
|
35
|
+
resourceManager = getResourceManager();
|
|
36
|
+
resourceId;
|
|
37
|
+
constructor(modelName, options = {}) {
|
|
38
|
+
super(modelName, options);
|
|
39
|
+
// Validate model is a causal-lm model
|
|
40
|
+
if (this.modelType !== 'causal-lm') {
|
|
41
|
+
throw new Error(`CausalLMGenerator requires a causal-lm model, but '${modelName}' is type '${this.modelType}'`);
|
|
42
|
+
}
|
|
43
|
+
}
|
|
44
|
+
// =============================================================================
|
|
45
|
+
// MODEL LIFECYCLE
|
|
46
|
+
// =============================================================================
|
|
47
|
+
/**
|
|
48
|
+
* Load the causal LM model using transformers.js
|
|
49
|
+
*/
|
|
50
|
+
async loadModel() {
|
|
51
|
+
if (this._isLoaded && this.pipeline) {
|
|
52
|
+
return;
|
|
53
|
+
}
|
|
54
|
+
try {
|
|
55
|
+
this.logModelLoading('Loading causal LM generator model');
|
|
56
|
+
// Ensure DOM polyfills
|
|
57
|
+
if (typeof globalThis.self === 'undefined') {
|
|
58
|
+
globalThis.self = globalThis;
|
|
59
|
+
}
|
|
60
|
+
// Dynamic import transformers.js
|
|
61
|
+
const { pipeline, AutoTokenizer } = await import('@huggingface/transformers');
|
|
62
|
+
// Load tokenizer first for token counting
|
|
63
|
+
this.logModelLoading('Loading tokenizer');
|
|
64
|
+
this.tokenizer = await AutoTokenizer.from_pretrained(this.modelName, {
|
|
65
|
+
cache_dir: this._options.cachePath || config.model_cache_path
|
|
66
|
+
});
|
|
67
|
+
// Load text generation pipeline
|
|
68
|
+
this.logModelLoading('Loading text generation pipeline');
|
|
69
|
+
this.pipeline = await pipeline('text-generation', this.modelName, {
|
|
70
|
+
cache_dir: this._options.cachePath || config.model_cache_path,
|
|
71
|
+
dtype: 'fp32'
|
|
72
|
+
});
|
|
73
|
+
// Register with resource manager
|
|
74
|
+
this.resourceId = this.resourceManager.registerModel(this.pipeline, this.modelName, 'generator');
|
|
75
|
+
this._isLoaded = true;
|
|
76
|
+
this.logModelLoading('Model loaded successfully');
|
|
77
|
+
}
|
|
78
|
+
catch (error) {
|
|
79
|
+
this._isLoaded = false;
|
|
80
|
+
throw this.handleLoadingError(error);
|
|
81
|
+
}
|
|
82
|
+
}
|
|
83
|
+
/**
|
|
84
|
+
* Clean up model resources
|
|
85
|
+
*/
|
|
86
|
+
async cleanup() {
|
|
87
|
+
try {
|
|
88
|
+
if (this.resourceId) {
|
|
89
|
+
await this.resourceManager.cleanupResource(this.resourceId);
|
|
90
|
+
this.resourceId = undefined;
|
|
91
|
+
}
|
|
92
|
+
// Clear references
|
|
93
|
+
this.pipeline = null;
|
|
94
|
+
this.tokenizer = null;
|
|
95
|
+
this._isLoaded = false;
|
|
96
|
+
// Force GC if available
|
|
97
|
+
if (global.gc) {
|
|
98
|
+
global.gc();
|
|
99
|
+
}
|
|
100
|
+
this.logModelLoading('Resources cleaned up');
|
|
101
|
+
}
|
|
102
|
+
catch (error) {
|
|
103
|
+
console.warn(`Cleanup error: ${error instanceof Error ? error.message : 'Unknown'}`);
|
|
104
|
+
this.pipeline = null;
|
|
105
|
+
this.tokenizer = null;
|
|
106
|
+
this._isLoaded = false;
|
|
107
|
+
}
|
|
108
|
+
}
|
|
109
|
+
// =============================================================================
|
|
110
|
+
// GENERATION IMPLEMENTATION
|
|
111
|
+
// =============================================================================
|
|
112
|
+
/**
|
|
113
|
+
* Generate text using the causal LM model
|
|
114
|
+
*/
|
|
115
|
+
async generateText(prompt, options) {
|
|
116
|
+
this.ensureLoaded();
|
|
117
|
+
try {
|
|
118
|
+
// Count prompt tokens
|
|
119
|
+
const promptTokens = await this.countTokens(prompt);
|
|
120
|
+
// Generate - GPT2 uses return_full_text differently
|
|
121
|
+
const result = await this.pipeline(prompt, {
|
|
122
|
+
max_new_tokens: options.maxTokens,
|
|
123
|
+
temperature: Math.max(0.1, options.temperature), // GPT2 needs temp > 0
|
|
124
|
+
top_p: options.topP,
|
|
125
|
+
top_k: options.topK,
|
|
126
|
+
repetition_penalty: options.repetitionPenalty,
|
|
127
|
+
do_sample: true,
|
|
128
|
+
return_full_text: true, // GPT2 needs full text
|
|
129
|
+
pad_token_id: this.tokenizer?.eos_token_id // GPT2 uses eos as pad
|
|
130
|
+
});
|
|
131
|
+
// Extract generated text (remove prompt)
|
|
132
|
+
let generatedText = result[0]?.generated_text || '';
|
|
133
|
+
if (generatedText.startsWith(prompt)) {
|
|
134
|
+
generatedText = generatedText.substring(prompt.length);
|
|
135
|
+
}
|
|
136
|
+
// Process stop sequences
|
|
137
|
+
let finalText = generatedText;
|
|
138
|
+
let finishReason = 'complete';
|
|
139
|
+
for (const stopSeq of options.stopSequences) {
|
|
140
|
+
const stopIndex = finalText.indexOf(stopSeq);
|
|
141
|
+
if (stopIndex !== -1) {
|
|
142
|
+
finalText = finalText.substring(0, stopIndex);
|
|
143
|
+
finishReason = 'stop_sequence';
|
|
144
|
+
break;
|
|
145
|
+
}
|
|
146
|
+
}
|
|
147
|
+
// Count completion tokens
|
|
148
|
+
const completionTokens = await this.countTokens(finalText);
|
|
149
|
+
// Check if we hit max tokens
|
|
150
|
+
if (completionTokens >= options.maxTokens - 5) {
|
|
151
|
+
finishReason = 'length';
|
|
152
|
+
}
|
|
153
|
+
return {
|
|
154
|
+
text: finalText,
|
|
155
|
+
promptTokens,
|
|
156
|
+
completionTokens,
|
|
157
|
+
finishReason
|
|
158
|
+
};
|
|
159
|
+
}
|
|
160
|
+
catch (error) {
|
|
161
|
+
throw new GenerationError(this.modelName, 'generation', `Text generation failed: ${error instanceof Error ? error.message : 'Unknown error'}`, error instanceof Error ? error : undefined);
|
|
162
|
+
}
|
|
163
|
+
}
|
|
164
|
+
/**
|
|
165
|
+
* Generate text with streaming output
|
|
166
|
+
*/
|
|
167
|
+
async *generateStream(request) {
|
|
168
|
+
// For now, fall back to non-streaming and yield the full response
|
|
169
|
+
// TODO: Implement true streaming when transformers.js supports it better
|
|
170
|
+
const result = await this.generate(request);
|
|
171
|
+
yield result.response;
|
|
172
|
+
}
|
|
173
|
+
// =============================================================================
|
|
174
|
+
// HELPER METHODS
|
|
175
|
+
// =============================================================================
|
|
176
|
+
/**
|
|
177
|
+
* Count tokens in a text string
|
|
178
|
+
*/
|
|
179
|
+
async countTokens(text) {
|
|
180
|
+
if (!this.tokenizer) {
|
|
181
|
+
// Fallback to estimation
|
|
182
|
+
return Math.ceil(text.length / 4);
|
|
183
|
+
}
|
|
184
|
+
try {
|
|
185
|
+
const encoded = await this.tokenizer(text, {
|
|
186
|
+
return_tensors: false,
|
|
187
|
+
padding: false,
|
|
188
|
+
truncation: false
|
|
189
|
+
});
|
|
190
|
+
return encoded.input_ids?.length || Math.ceil(text.length / 4);
|
|
191
|
+
}
|
|
192
|
+
catch {
|
|
193
|
+
return Math.ceil(text.length / 4);
|
|
194
|
+
}
|
|
195
|
+
}
|
|
196
|
+
}
|
|
197
|
+
//# sourceMappingURL=causal-lm-generator.js.map
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* TEXT GENERATORS — Export Module
|
|
3
|
+
*
|
|
4
|
+
* Provides text generation capabilities for RAG response synthesis.
|
|
5
|
+
*
|
|
6
|
+
* @experimental This feature is experimental and may change in future versions.
|
|
7
|
+
*/
|
|
8
|
+
export { InstructGenerator } from './instruct-generator.js';
|
|
9
|
+
export { CausalLMGenerator } from './causal-lm-generator.js';
|
|
10
|
+
//# sourceMappingURL=index.d.ts.map
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* TEXT GENERATORS — Export Module
|
|
3
|
+
*
|
|
4
|
+
* Provides text generation capabilities for RAG response synthesis.
|
|
5
|
+
*
|
|
6
|
+
* @experimental This feature is experimental and may change in future versions.
|
|
7
|
+
*/
|
|
8
|
+
export { InstructGenerator } from './instruct-generator.js';
|
|
9
|
+
export { CausalLMGenerator } from './causal-lm-generator.js';
|
|
10
|
+
//# sourceMappingURL=index.js.map
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* TEXT IMPLEMENTATION — Instruct Generator for SmolLM2-Instruct Models
|
|
3
|
+
*
|
|
4
|
+
* Implements ResponseGenerator interface for instruction-tuned models.
|
|
5
|
+
* Supports SmolLM2-135M-Instruct and SmolLM2-360M-Instruct.
|
|
6
|
+
*
|
|
7
|
+
* Features:
|
|
8
|
+
* - Chat template formatting with system/user/assistant roles
|
|
9
|
+
* - Streaming generation support
|
|
10
|
+
* - Proper stop sequence handling
|
|
11
|
+
* - Resource management via ResourceManager
|
|
12
|
+
*
|
|
13
|
+
* @experimental This feature is experimental and may change in future versions.
|
|
14
|
+
*/
|
|
15
|
+
import '../../dom-polyfills.js';
|
|
16
|
+
import { BaseResponseGenerator, type GeneratorOptions } from '../../core/abstract-generator.js';
|
|
17
|
+
/**
|
|
18
|
+
* Instruct generator implementation for SmolLM2-Instruct models
|
|
19
|
+
*
|
|
20
|
+
* Uses instruction-tuned models that understand chat templates with
|
|
21
|
+
* system, user, and assistant roles for better response quality.
|
|
22
|
+
*/
|
|
23
|
+
export declare class InstructGenerator extends BaseResponseGenerator {
|
|
24
|
+
private pipeline;
|
|
25
|
+
private tokenizer;
|
|
26
|
+
private resourceManager;
|
|
27
|
+
private resourceId?;
|
|
28
|
+
constructor(modelName: string, options?: GeneratorOptions);
|
|
29
|
+
/**
|
|
30
|
+
* Load the instruct model using transformers.js
|
|
31
|
+
*/
|
|
32
|
+
loadModel(): Promise<void>;
|
|
33
|
+
/**
|
|
34
|
+
* Clean up model resources
|
|
35
|
+
*/
|
|
36
|
+
cleanup(): Promise<void>;
|
|
37
|
+
/**
|
|
38
|
+
* Generate text using the instruct model
|
|
39
|
+
*/
|
|
40
|
+
protected generateText(prompt: string, options: {
|
|
41
|
+
maxTokens: number;
|
|
42
|
+
temperature: number;
|
|
43
|
+
topP: number;
|
|
44
|
+
topK: number;
|
|
45
|
+
repetitionPenalty: number;
|
|
46
|
+
stopSequences: string[];
|
|
47
|
+
}): Promise<{
|
|
48
|
+
text: string;
|
|
49
|
+
promptTokens: number;
|
|
50
|
+
completionTokens: number;
|
|
51
|
+
finishReason: 'complete' | 'length' | 'stop_sequence' | 'error';
|
|
52
|
+
}>;
|
|
53
|
+
/**
|
|
54
|
+
* Generate text with streaming output
|
|
55
|
+
*/
|
|
56
|
+
generateStream(request: import('../../core/response-generator.js').GenerationRequest): AsyncIterable<string>;
|
|
57
|
+
/**
|
|
58
|
+
* Count tokens in a text string
|
|
59
|
+
*/
|
|
60
|
+
private countTokens;
|
|
61
|
+
}
|
|
62
|
+
//# sourceMappingURL=instruct-generator.d.ts.map
|
|
@@ -0,0 +1,192 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* TEXT IMPLEMENTATION — Instruct Generator for SmolLM2-Instruct Models
|
|
3
|
+
*
|
|
4
|
+
* Implements ResponseGenerator interface for instruction-tuned models.
|
|
5
|
+
* Supports SmolLM2-135M-Instruct and SmolLM2-360M-Instruct.
|
|
6
|
+
*
|
|
7
|
+
* Features:
|
|
8
|
+
* - Chat template formatting with system/user/assistant roles
|
|
9
|
+
* - Streaming generation support
|
|
10
|
+
* - Proper stop sequence handling
|
|
11
|
+
* - Resource management via ResourceManager
|
|
12
|
+
*
|
|
13
|
+
* @experimental This feature is experimental and may change in future versions.
|
|
14
|
+
*/
|
|
15
|
+
import '../../dom-polyfills.js';
|
|
16
|
+
import { BaseResponseGenerator } from '../../core/abstract-generator.js';
|
|
17
|
+
import { GenerationError } from '../../core/response-generator.js';
|
|
18
|
+
import { getResourceManager } from '../../core/resource-manager.js';
|
|
19
|
+
import { config } from '../../core/config.js';
|
|
20
|
+
// =============================================================================
|
|
21
|
+
// INSTRUCT GENERATOR IMPLEMENTATION
|
|
22
|
+
// =============================================================================
|
|
23
|
+
/**
|
|
24
|
+
* Instruct generator implementation for SmolLM2-Instruct models
|
|
25
|
+
*
|
|
26
|
+
* Uses instruction-tuned models that understand chat templates with
|
|
27
|
+
* system, user, and assistant roles for better response quality.
|
|
28
|
+
*/
|
|
29
|
+
export class InstructGenerator extends BaseResponseGenerator {
|
|
30
|
+
pipeline = null;
|
|
31
|
+
tokenizer = null;
|
|
32
|
+
resourceManager = getResourceManager();
|
|
33
|
+
resourceId;
|
|
34
|
+
constructor(modelName, options = {}) {
|
|
35
|
+
super(modelName, options);
|
|
36
|
+
// Validate model is an instruct model
|
|
37
|
+
if (this.modelType !== 'instruct') {
|
|
38
|
+
throw new Error(`InstructGenerator requires an instruct model, but '${modelName}' is type '${this.modelType}'`);
|
|
39
|
+
}
|
|
40
|
+
}
|
|
41
|
+
// =============================================================================
|
|
42
|
+
// MODEL LIFECYCLE
|
|
43
|
+
// =============================================================================
|
|
44
|
+
/**
|
|
45
|
+
* Load the instruct model using transformers.js
|
|
46
|
+
*/
|
|
47
|
+
async loadModel() {
|
|
48
|
+
if (this._isLoaded && this.pipeline) {
|
|
49
|
+
return;
|
|
50
|
+
}
|
|
51
|
+
try {
|
|
52
|
+
this.logModelLoading('Loading instruct generator model');
|
|
53
|
+
// Ensure DOM polyfills
|
|
54
|
+
if (typeof globalThis.self === 'undefined') {
|
|
55
|
+
globalThis.self = globalThis;
|
|
56
|
+
}
|
|
57
|
+
// Dynamic import transformers.js
|
|
58
|
+
const { pipeline, AutoTokenizer } = await import('@huggingface/transformers');
|
|
59
|
+
// Load tokenizer first for token counting
|
|
60
|
+
this.logModelLoading('Loading tokenizer');
|
|
61
|
+
this.tokenizer = await AutoTokenizer.from_pretrained(this.modelName, {
|
|
62
|
+
cache_dir: this._options.cachePath || config.model_cache_path
|
|
63
|
+
});
|
|
64
|
+
// Load text generation pipeline
|
|
65
|
+
this.logModelLoading('Loading text generation pipeline');
|
|
66
|
+
this.pipeline = await pipeline('text-generation', this.modelName, {
|
|
67
|
+
cache_dir: this._options.cachePath || config.model_cache_path,
|
|
68
|
+
dtype: 'fp32'
|
|
69
|
+
});
|
|
70
|
+
// Register with resource manager
|
|
71
|
+
this.resourceId = this.resourceManager.registerModel(this.pipeline, this.modelName, 'generator');
|
|
72
|
+
this._isLoaded = true;
|
|
73
|
+
this.logModelLoading('Model loaded successfully');
|
|
74
|
+
}
|
|
75
|
+
catch (error) {
|
|
76
|
+
this._isLoaded = false;
|
|
77
|
+
throw this.handleLoadingError(error);
|
|
78
|
+
}
|
|
79
|
+
}
|
|
80
|
+
/**
|
|
81
|
+
* Clean up model resources
|
|
82
|
+
*/
|
|
83
|
+
async cleanup() {
|
|
84
|
+
try {
|
|
85
|
+
if (this.resourceId) {
|
|
86
|
+
await this.resourceManager.cleanupResource(this.resourceId);
|
|
87
|
+
this.resourceId = undefined;
|
|
88
|
+
}
|
|
89
|
+
// Clear references
|
|
90
|
+
this.pipeline = null;
|
|
91
|
+
this.tokenizer = null;
|
|
92
|
+
this._isLoaded = false;
|
|
93
|
+
// Force GC if available
|
|
94
|
+
if (global.gc) {
|
|
95
|
+
global.gc();
|
|
96
|
+
}
|
|
97
|
+
this.logModelLoading('Resources cleaned up');
|
|
98
|
+
}
|
|
99
|
+
catch (error) {
|
|
100
|
+
console.warn(`Cleanup error: ${error instanceof Error ? error.message : 'Unknown'}`);
|
|
101
|
+
this.pipeline = null;
|
|
102
|
+
this.tokenizer = null;
|
|
103
|
+
this._isLoaded = false;
|
|
104
|
+
}
|
|
105
|
+
}
|
|
106
|
+
// =============================================================================
|
|
107
|
+
// GENERATION IMPLEMENTATION
|
|
108
|
+
// =============================================================================
|
|
109
|
+
/**
|
|
110
|
+
* Generate text using the instruct model
|
|
111
|
+
*/
|
|
112
|
+
async generateText(prompt, options) {
|
|
113
|
+
this.ensureLoaded();
|
|
114
|
+
try {
|
|
115
|
+
// Count prompt tokens
|
|
116
|
+
const promptTokens = await this.countTokens(prompt);
|
|
117
|
+
// Generate
|
|
118
|
+
const result = await this.pipeline(prompt, {
|
|
119
|
+
max_new_tokens: options.maxTokens,
|
|
120
|
+
temperature: options.temperature,
|
|
121
|
+
top_p: options.topP,
|
|
122
|
+
top_k: options.topK,
|
|
123
|
+
repetition_penalty: options.repetitionPenalty,
|
|
124
|
+
do_sample: options.temperature > 0,
|
|
125
|
+
return_full_text: false,
|
|
126
|
+
pad_token_id: this.tokenizer?.pad_token_id,
|
|
127
|
+
eos_token_id: this.tokenizer?.eos_token_id
|
|
128
|
+
});
|
|
129
|
+
// Extract generated text
|
|
130
|
+
const generatedText = result[0]?.generated_text || '';
|
|
131
|
+
// Process stop sequences
|
|
132
|
+
let finalText = generatedText;
|
|
133
|
+
let finishReason = 'complete';
|
|
134
|
+
for (const stopSeq of options.stopSequences) {
|
|
135
|
+
const stopIndex = finalText.indexOf(stopSeq);
|
|
136
|
+
if (stopIndex !== -1) {
|
|
137
|
+
finalText = finalText.substring(0, stopIndex);
|
|
138
|
+
finishReason = 'stop_sequence';
|
|
139
|
+
break;
|
|
140
|
+
}
|
|
141
|
+
}
|
|
142
|
+
// Count completion tokens
|
|
143
|
+
const completionTokens = await this.countTokens(finalText);
|
|
144
|
+
// Check if we hit max tokens
|
|
145
|
+
if (completionTokens >= options.maxTokens - 5) {
|
|
146
|
+
finishReason = 'length';
|
|
147
|
+
}
|
|
148
|
+
return {
|
|
149
|
+
text: finalText,
|
|
150
|
+
promptTokens,
|
|
151
|
+
completionTokens,
|
|
152
|
+
finishReason
|
|
153
|
+
};
|
|
154
|
+
}
|
|
155
|
+
catch (error) {
|
|
156
|
+
throw new GenerationError(this.modelName, 'generation', `Text generation failed: ${error instanceof Error ? error.message : 'Unknown error'}`, error instanceof Error ? error : undefined);
|
|
157
|
+
}
|
|
158
|
+
}
|
|
159
|
+
/**
|
|
160
|
+
* Generate text with streaming output
|
|
161
|
+
*/
|
|
162
|
+
async *generateStream(request) {
|
|
163
|
+
// For now, fall back to non-streaming and yield the full response
|
|
164
|
+
// TODO: Implement true streaming when transformers.js supports it better
|
|
165
|
+
const result = await this.generate(request);
|
|
166
|
+
yield result.response;
|
|
167
|
+
}
|
|
168
|
+
// =============================================================================
|
|
169
|
+
// HELPER METHODS
|
|
170
|
+
// =============================================================================
|
|
171
|
+
/**
|
|
172
|
+
* Count tokens in a text string
|
|
173
|
+
*/
|
|
174
|
+
async countTokens(text) {
|
|
175
|
+
if (!this.tokenizer) {
|
|
176
|
+
// Fallback to estimation
|
|
177
|
+
return Math.ceil(text.length / 4);
|
|
178
|
+
}
|
|
179
|
+
try {
|
|
180
|
+
const encoded = await this.tokenizer(text, {
|
|
181
|
+
return_tensors: false,
|
|
182
|
+
padding: false,
|
|
183
|
+
truncation: false
|
|
184
|
+
});
|
|
185
|
+
return encoded.input_ids?.length || Math.ceil(text.length / 4);
|
|
186
|
+
}
|
|
187
|
+
catch {
|
|
188
|
+
return Math.ceil(text.length / 4);
|
|
189
|
+
}
|
|
190
|
+
}
|
|
191
|
+
}
|
|
192
|
+
//# sourceMappingURL=instruct-generator.js.map
|
package/dist/esm/cli/indexer.js
CHANGED
|
@@ -80,7 +80,7 @@ async function validateModeConfiguration(options) {
|
|
|
80
80
|
*/
|
|
81
81
|
export async function runIngest(path, options = {}) {
|
|
82
82
|
try {
|
|
83
|
-
// Handle --rebuild
|
|
83
|
+
// Handle --force-rebuild flag immediately to prevent dimension mismatch errors
|
|
84
84
|
// Validate path exists
|
|
85
85
|
const resolvedPath = resolve(path);
|
|
86
86
|
if (!existsSync(resolvedPath)) {
|
|
@@ -159,26 +159,52 @@ export async function runIngest(path, options = {}) {
|
|
|
159
159
|
factoryOptions.mode = options.mode;
|
|
160
160
|
console.log(`Using processing mode: ${options.mode}`);
|
|
161
161
|
}
|
|
162
|
-
if (options.
|
|
162
|
+
if (options.forceRebuild) {
|
|
163
163
|
factoryOptions.forceRebuild = true;
|
|
164
|
-
console.log('Force rebuild enabled
|
|
165
|
-
// Delete old index file immediately to prevent dimension mismatch errors
|
|
166
|
-
const indexPath = process.env.RAG_INDEX_FILE || './vector-index.bin';
|
|
167
|
-
const { existsSync, unlinkSync } = await import('fs');
|
|
168
|
-
if (existsSync(indexPath)) {
|
|
169
|
-
try {
|
|
170
|
-
unlinkSync(indexPath);
|
|
171
|
-
console.log('🗑️ Removed old index file to prevent dimension mismatch');
|
|
172
|
-
}
|
|
173
|
-
catch (error) {
|
|
174
|
-
console.warn(`⚠️ Could not remove old index file: ${error}`);
|
|
175
|
-
}
|
|
176
|
-
}
|
|
164
|
+
console.log('Force rebuild enabled (--force-rebuild)');
|
|
177
165
|
}
|
|
178
166
|
// Validate mode-specific model and strategy combinations
|
|
179
167
|
await validateModeConfiguration(factoryOptions);
|
|
180
168
|
const dbPath = process.env.RAG_DB_FILE || './db.sqlite';
|
|
181
169
|
const indexPath = process.env.RAG_INDEX_FILE || './vector-index.bin';
|
|
170
|
+
// --force-rebuild: Always delete DB (and sidecars) and index to guarantee a clean rebuild.
|
|
171
|
+
if (options.forceRebuild) {
|
|
172
|
+
try {
|
|
173
|
+
const { existsSync: fsExistsSync, unlinkSync } = await import('fs');
|
|
174
|
+
console.log('🗑️ Deleting existing database and index to perform a clean rebuild...');
|
|
175
|
+
// Remove WAL/SHM if present (common on SQLite with WAL journaling).
|
|
176
|
+
const sidecars = [`${dbPath}-wal`, `${dbPath}-shm`];
|
|
177
|
+
for (const p of sidecars) {
|
|
178
|
+
if (fsExistsSync(p)) {
|
|
179
|
+
try {
|
|
180
|
+
unlinkSync(p);
|
|
181
|
+
}
|
|
182
|
+
catch (e) {
|
|
183
|
+
console.warn(`⚠️ Could not remove SQLite sidecar file (${p}):`, e);
|
|
184
|
+
}
|
|
185
|
+
}
|
|
186
|
+
}
|
|
187
|
+
if (fsExistsSync(dbPath)) {
|
|
188
|
+
try {
|
|
189
|
+
unlinkSync(dbPath);
|
|
190
|
+
}
|
|
191
|
+
catch (e) {
|
|
192
|
+
console.warn(`⚠️ Could not remove database file (${dbPath}):`, e);
|
|
193
|
+
}
|
|
194
|
+
}
|
|
195
|
+
if (fsExistsSync(indexPath)) {
|
|
196
|
+
try {
|
|
197
|
+
unlinkSync(indexPath);
|
|
198
|
+
}
|
|
199
|
+
catch (e) {
|
|
200
|
+
console.warn(`⚠️ Could not remove index file (${indexPath}):`, e);
|
|
201
|
+
}
|
|
202
|
+
}
|
|
203
|
+
}
|
|
204
|
+
catch (error) {
|
|
205
|
+
console.warn('⚠️ Could not delete existing database/index for clean rebuild:', error instanceof Error ? error.message : String(error));
|
|
206
|
+
}
|
|
207
|
+
}
|
|
182
208
|
// Setup graceful cleanup
|
|
183
209
|
setupCLICleanup(dbPath);
|
|
184
210
|
// Check if database is busy before starting
|
|
@@ -218,6 +244,22 @@ export async function runIngest(path, options = {}) {
|
|
|
218
244
|
console.log(`Processing rate: ${chunksPerSecond} chunks/second`);
|
|
219
245
|
}
|
|
220
246
|
console.log('\nIngestion completed successfully!');
|
|
247
|
+
// Run VACUUM to compact the SQLite database after ingestion
|
|
248
|
+
try {
|
|
249
|
+
const { openDatabase } = await import('../core/db.js');
|
|
250
|
+
const vacuumDb = await openDatabase(dbPath);
|
|
251
|
+
try {
|
|
252
|
+
console.log('Running VACUUM to optimize database size...');
|
|
253
|
+
await vacuumDb.run('VACUUM');
|
|
254
|
+
console.log('VACUUM completed successfully.');
|
|
255
|
+
}
|
|
256
|
+
finally {
|
|
257
|
+
await vacuumDb.close();
|
|
258
|
+
}
|
|
259
|
+
}
|
|
260
|
+
catch (vacuumError) {
|
|
261
|
+
console.warn('⚠️ VACUUM operation failed or was skipped:', vacuumError instanceof Error ? vacuumError.message : String(vacuumError));
|
|
262
|
+
}
|
|
221
263
|
// Display mode-specific information
|
|
222
264
|
const mode = options.mode || 'text';
|
|
223
265
|
if (mode === 'multimodal') {
|
|
@@ -397,6 +439,22 @@ export async function runRebuild() {
|
|
|
397
439
|
console.log('All embeddings have been regenerated with the current model.');
|
|
398
440
|
console.log('');
|
|
399
441
|
console.log('You can now search your documents using: raglite search "your query"');
|
|
442
|
+
// Run VACUUM to compact the SQLite database after rebuild
|
|
443
|
+
try {
|
|
444
|
+
const { openDatabase } = await import('../core/db.js');
|
|
445
|
+
const vacuumDb = await openDatabase(dbPath);
|
|
446
|
+
try {
|
|
447
|
+
console.log('Running VACUUM to optimize database size after rebuild...');
|
|
448
|
+
await vacuumDb.run('VACUUM');
|
|
449
|
+
console.log('VACUUM completed successfully.');
|
|
450
|
+
}
|
|
451
|
+
finally {
|
|
452
|
+
await vacuumDb.close();
|
|
453
|
+
}
|
|
454
|
+
}
|
|
455
|
+
catch (vacuumError) {
|
|
456
|
+
console.warn('⚠️ VACUUM operation failed or was skipped:', vacuumError instanceof Error ? vacuumError.message : String(vacuumError));
|
|
457
|
+
}
|
|
400
458
|
}
|
|
401
459
|
finally {
|
|
402
460
|
await db.close();
|