rag-lite-ts 2.0.1 → 2.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +27 -0
- package/dist/cli/indexer.js +21 -2
- package/dist/cli.js +2 -2
- package/dist/core/batch-processing-optimizer.js +6 -11
- package/dist/core/ingestion.js +13 -3
- package/dist/core/model-registry.js +4 -4
- package/dist/core/reranking-strategies.d.ts +1 -16
- package/dist/core/reranking-strategies.js +12 -82
- package/dist/dom-polyfills.js +3 -6
- package/dist/factories/text-factory.js +32 -18
- package/dist/file-processor.js +30 -102
- package/dist/indexer.js +5 -2
- package/dist/ingestion.js +18 -3
- package/dist/mcp-server.js +16 -9
- package/dist/multimodal/clip-embedder.js +11 -11
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -438,6 +438,33 @@ Now Claude can search your docs directly! Works with any MCP-compatible AI tool.
|
|
|
438
438
|
</tr>
|
|
439
439
|
</table>
|
|
440
440
|
|
|
441
|
+
### 📁 Supported File Formats
|
|
442
|
+
|
|
443
|
+
RAG-lite TS supports the following file formats with full processing implementations:
|
|
444
|
+
|
|
445
|
+
**Text Mode:**
|
|
446
|
+
- Markdown: `.md`, `.mdx`
|
|
447
|
+
- Plain text: `.txt`
|
|
448
|
+
- Documents: `.pdf`, `.docx`
|
|
449
|
+
|
|
450
|
+
**Multimodal Mode** (includes all text formats plus):
|
|
451
|
+
- Images: `.jpg`, `.jpeg`, `.png`, `.gif`, `.webp`, `.bmp`
|
|
452
|
+
|
|
453
|
+
All formats work seamlessly with both single file and directory ingestion:
|
|
454
|
+
|
|
455
|
+
```bash
|
|
456
|
+
# Single file ingestion
|
|
457
|
+
raglite ingest ./document.pdf
|
|
458
|
+
raglite ingest ./readme.md
|
|
459
|
+
raglite ingest ./notes.txt
|
|
460
|
+
|
|
461
|
+
# Directory ingestion (processes all supported formats)
|
|
462
|
+
raglite ingest ./docs/
|
|
463
|
+
|
|
464
|
+
# Multimodal ingestion (includes images)
|
|
465
|
+
raglite ingest ./mixed-content/ --mode multimodal
|
|
466
|
+
```
|
|
467
|
+
|
|
441
468
|
## 🔧 How It Works
|
|
442
469
|
|
|
443
470
|
RAG-lite TS follows a clean, efficient pipeline:
|
package/dist/cli/indexer.js
CHANGED
|
@@ -148,12 +148,31 @@ export async function runIngest(path, options = {}) {
|
|
|
148
148
|
const pathType = stats.isDirectory() ? 'directory' : 'file';
|
|
149
149
|
// Validate file type for single files
|
|
150
150
|
if (stats.isFile()) {
|
|
151
|
-
const
|
|
151
|
+
const mode = options.mode || 'text';
|
|
152
|
+
// Only formats with actual processing implementations
|
|
153
|
+
const textExtensions = ['.md', '.txt', '.mdx', '.pdf', '.docx'];
|
|
154
|
+
const imageExtensions = ['.jpg', '.jpeg', '.png', '.gif', '.webp', '.bmp'];
|
|
155
|
+
const validExtensions = mode === 'multimodal'
|
|
156
|
+
? [...textExtensions, ...imageExtensions]
|
|
157
|
+
: textExtensions;
|
|
152
158
|
const hasValidExtension = validExtensions.some(ext => path.toLowerCase().endsWith(ext));
|
|
153
159
|
if (!hasValidExtension) {
|
|
154
160
|
console.error(`Error: Unsupported file type: ${path}`);
|
|
155
161
|
console.error('');
|
|
156
|
-
|
|
162
|
+
if (mode === 'multimodal') {
|
|
163
|
+
console.error('Supported file types in multimodal mode:');
|
|
164
|
+
console.error(' Text: .md, .txt, .mdx');
|
|
165
|
+
console.error(' Documents: .pdf, .docx');
|
|
166
|
+
console.error(' Images: .jpg, .jpeg, .png, .gif, .webp, .bmp');
|
|
167
|
+
}
|
|
168
|
+
else {
|
|
169
|
+
console.error('Supported file types in text mode:');
|
|
170
|
+
console.error(' Text: .md, .txt, .mdx');
|
|
171
|
+
console.error(' Documents: .pdf, .docx');
|
|
172
|
+
console.error('');
|
|
173
|
+
console.error('For image files, use --mode multimodal:');
|
|
174
|
+
console.error(' raglite ingest <path> --mode multimodal');
|
|
175
|
+
}
|
|
157
176
|
console.error('');
|
|
158
177
|
console.error('If you want to ingest multiple files, provide a directory path instead.');
|
|
159
178
|
process.exit(EXIT_CODES.INVALID_ARGUMENTS);
|
package/dist/cli.js
CHANGED
|
@@ -26,7 +26,7 @@ Commands:
|
|
|
26
26
|
help Show this help message
|
|
27
27
|
|
|
28
28
|
Examples:
|
|
29
|
-
raglite ingest ./docs/ # Ingest all .md/.txt files in docs/
|
|
29
|
+
raglite ingest ./docs/ # Ingest all .md/.txt/.docx/.pdf files in docs/
|
|
30
30
|
raglite ingest ./readme.md # Ingest single file
|
|
31
31
|
raglite ingest ./docs/ --model Xenova/all-mpnet-base-v2 # Use higher quality model
|
|
32
32
|
raglite ingest ./docs/ --mode multimodal # Enable multimodal processing
|
|
@@ -126,7 +126,7 @@ function validateArgs(command, args, options) {
|
|
|
126
126
|
console.error('Usage: raglite ingest <path>');
|
|
127
127
|
console.error('');
|
|
128
128
|
console.error('Examples:');
|
|
129
|
-
console.error(' raglite ingest ./docs/ # Ingest all .md/.txt files in docs/');
|
|
129
|
+
console.error(' raglite ingest ./docs/ # Ingest all .md/.txt/.docx/.pdf files in docs/');
|
|
130
130
|
console.error(' raglite ingest ./readme.md # Ingest single file');
|
|
131
131
|
console.error(' raglite ingest ./docs/ --model Xenova/all-mpnet-base-v2 # Use higher quality model');
|
|
132
132
|
console.error(' raglite ingest ./docs/ --mode multimodal # Enable multimodal processing');
|
|
@@ -15,8 +15,8 @@ export const DEFAULT_BATCH_CONFIG = {
|
|
|
15
15
|
textBatchSize: 16,
|
|
16
16
|
imageBatchSize: 4, // Smaller for memory-intensive image processing
|
|
17
17
|
maxConcurrentBatches: 2,
|
|
18
|
-
// Memory management (
|
|
19
|
-
memoryThresholdMB:
|
|
18
|
+
// Memory management (512MB threshold for multimodal processing)
|
|
19
|
+
memoryThresholdMB: 512,
|
|
20
20
|
enableMemoryMonitoring: true,
|
|
21
21
|
enableGarbageCollection: true,
|
|
22
22
|
// Progress reporting every 5 batches
|
|
@@ -402,13 +402,8 @@ export class BatchProcessingOptimizer {
|
|
|
402
402
|
*/
|
|
403
403
|
async preloadImageProcessingModels() {
|
|
404
404
|
try {
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
const processor = await LazyMultimodalLoader.loadImageToTextProcessor();
|
|
408
|
-
this.resourcePool.set('imageToText', processor);
|
|
409
|
-
// Register with resource manager
|
|
410
|
-
this.resourceManager.registerImageProcessor(processor, 'image-to-text');
|
|
411
|
-
}
|
|
405
|
+
// Note: Image-to-text processor is loaded on-demand by file-processor.ts
|
|
406
|
+
// to avoid conflicts with different pipeline configurations
|
|
412
407
|
if (!this.resourcePool.has('metadataExtractor')) {
|
|
413
408
|
console.log('Preloading image metadata extractor...');
|
|
414
409
|
const extractor = await LazyMultimodalLoader.loadImageMetadataExtractor();
|
|
@@ -519,7 +514,7 @@ export function createImageBatchProcessor() {
|
|
|
519
514
|
return new BatchProcessingOptimizer({
|
|
520
515
|
imageBatchSize: 2, // Very small batches for memory efficiency
|
|
521
516
|
textBatchSize: 8,
|
|
522
|
-
memoryThresholdMB:
|
|
517
|
+
memoryThresholdMB: 512, // Higher threshold for memory-intensive image processing
|
|
523
518
|
enableMemoryMonitoring: true,
|
|
524
519
|
enableGarbageCollection: true,
|
|
525
520
|
enableParallelProcessing: false, // Sequential for better memory control
|
|
@@ -534,7 +529,7 @@ export function createTextBatchProcessor() {
|
|
|
534
529
|
textBatchSize: 32, // Larger batches for text
|
|
535
530
|
imageBatchSize: 4,
|
|
536
531
|
enableParallelProcessing: true, // Parallel processing for text
|
|
537
|
-
memoryThresholdMB:
|
|
532
|
+
memoryThresholdMB: 256, // Lower threshold sufficient for text processing
|
|
538
533
|
progressReportInterval: 10
|
|
539
534
|
});
|
|
540
535
|
}
|
package/dist/core/ingestion.js
CHANGED
|
@@ -290,7 +290,7 @@ export class IngestionPipeline {
|
|
|
290
290
|
chunkSize: config.chunk_size,
|
|
291
291
|
chunkOverlap: config.chunk_overlap
|
|
292
292
|
};
|
|
293
|
-
const chunkingResult = await this.chunkDocumentsWithContentTypes(fileResult.documents, effectiveChunkConfig);
|
|
293
|
+
const chunkingResult = await this.chunkDocumentsWithContentTypes(fileResult.documents, effectiveChunkConfig, options.mode);
|
|
294
294
|
if (chunkingResult.totalChunks === 0) {
|
|
295
295
|
console.log('No chunks created from documents');
|
|
296
296
|
return {
|
|
@@ -364,7 +364,7 @@ export class IngestionPipeline {
|
|
|
364
364
|
* Chunk all documents and organize results with content-type awareness
|
|
365
365
|
* Enhanced to handle different content types appropriately
|
|
366
366
|
*/
|
|
367
|
-
async chunkDocumentsWithContentTypes(documents, chunkConfig) {
|
|
367
|
+
async chunkDocumentsWithContentTypes(documents, chunkConfig, mode) {
|
|
368
368
|
const documentChunks = [];
|
|
369
369
|
const allChunks = [];
|
|
370
370
|
let totalChunks = 0;
|
|
@@ -384,8 +384,18 @@ export class IngestionPipeline {
|
|
|
384
384
|
metadata: document.metadata
|
|
385
385
|
}];
|
|
386
386
|
}
|
|
387
|
+
else if (mode === 'multimodal') {
|
|
388
|
+
// In multimodal mode, don't chunk text - CLIP handles truncation at 77 tokens
|
|
389
|
+
// Chunking doesn't make sense because CLIP can't handle long text anyway
|
|
390
|
+
chunks = [{
|
|
391
|
+
text: document.content,
|
|
392
|
+
chunkIndex: 0,
|
|
393
|
+
contentType: 'text',
|
|
394
|
+
metadata: document.metadata
|
|
395
|
+
}];
|
|
396
|
+
}
|
|
387
397
|
else {
|
|
388
|
-
// For text
|
|
398
|
+
// For text mode, use normal chunking
|
|
389
399
|
const textChunks = await chunkDocument(document, chunkConfig);
|
|
390
400
|
chunks = textChunks.map(chunk => ({
|
|
391
401
|
...chunk,
|
|
@@ -69,7 +69,7 @@ export const SUPPORTED_MODELS = {
|
|
|
69
69
|
supportsMetadata: true,
|
|
70
70
|
supportsMultimodal: true, // True cross-modal search capabilities
|
|
71
71
|
maxBatchSize: 8,
|
|
72
|
-
maxTextLength: 77, // CLIP's
|
|
72
|
+
maxTextLength: 77, // CLIP's token limit (tokenizer handles truncation)
|
|
73
73
|
supportedImageFormats: ['jpg', 'jpeg', 'png', 'webp', 'gif']
|
|
74
74
|
},
|
|
75
75
|
requirements: {
|
|
@@ -92,7 +92,7 @@ export const SUPPORTED_MODELS = {
|
|
|
92
92
|
supportsMetadata: true,
|
|
93
93
|
supportsMultimodal: true, // True cross-modal search capabilities
|
|
94
94
|
maxBatchSize: 4,
|
|
95
|
-
maxTextLength: 77, // CLIP's
|
|
95
|
+
maxTextLength: 77, // CLIP's token limit (tokenizer handles truncation)
|
|
96
96
|
supportedImageFormats: ['jpg', 'jpeg', 'png', 'webp', 'gif']
|
|
97
97
|
},
|
|
98
98
|
requirements: {
|
|
@@ -194,9 +194,9 @@ export class ModelRegistry {
|
|
|
194
194
|
suggestions.push('Use smaller batch sizes for optimal performance');
|
|
195
195
|
}
|
|
196
196
|
// Text length limitations
|
|
197
|
-
if (modelInfo.capabilities.maxTextLength && modelInfo.capabilities.maxTextLength <
|
|
197
|
+
if (modelInfo.capabilities.maxTextLength && modelInfo.capabilities.maxTextLength < 256) {
|
|
198
198
|
warnings.push(`Model has limited text length: ${modelInfo.capabilities.maxTextLength} characters`);
|
|
199
|
-
suggestions.push('
|
|
199
|
+
suggestions.push('Long texts will be truncated by the tokenizer');
|
|
200
200
|
}
|
|
201
201
|
// Image format support
|
|
202
202
|
if (modelInfo.capabilities.supportsImages && modelInfo.capabilities.supportedImageFormats) {
|
|
@@ -97,20 +97,10 @@ export declare class TextDerivedRerankingStrategy implements RerankingStrategy {
|
|
|
97
97
|
readonly supportedContentTypes: string[];
|
|
98
98
|
isEnabled: boolean;
|
|
99
99
|
private crossEncoderReranker;
|
|
100
|
-
private imageToTextModel;
|
|
101
|
-
private imageToTextModelName;
|
|
102
|
-
private initialized;
|
|
103
100
|
constructor(imageToTextModelName?: string, crossEncoderModelName?: string);
|
|
104
|
-
/**
|
|
105
|
-
* Initialize the image-to-text model if not already done
|
|
106
|
-
*/
|
|
107
|
-
private ensureInitialized;
|
|
108
|
-
/**
|
|
109
|
-
* Ensure DOM polyfills are set up for transformers.js
|
|
110
|
-
*/
|
|
111
|
-
private ensurePolyfills;
|
|
112
101
|
/**
|
|
113
102
|
* Generate text description for an image
|
|
103
|
+
* Uses the shared image-to-text functionality from file-processor
|
|
114
104
|
*/
|
|
115
105
|
private generateImageDescription;
|
|
116
106
|
/**
|
|
@@ -128,11 +118,6 @@ export declare class TextDerivedRerankingStrategy implements RerankingStrategy {
|
|
|
128
118
|
description: string;
|
|
129
119
|
requiredModels: string[];
|
|
130
120
|
configOptions: {
|
|
131
|
-
imageToTextModel: {
|
|
132
|
-
type: string;
|
|
133
|
-
description: string;
|
|
134
|
-
default: string;
|
|
135
|
-
};
|
|
136
121
|
crossEncoderModel: {
|
|
137
122
|
type: string;
|
|
138
123
|
description: string;
|
|
@@ -174,69 +174,22 @@ export class TextDerivedRerankingStrategy {
|
|
|
174
174
|
supportedContentTypes = ['text', 'image'];
|
|
175
175
|
isEnabled = true;
|
|
176
176
|
crossEncoderReranker;
|
|
177
|
-
imageToTextModel = null;
|
|
178
|
-
imageToTextModelName = 'Xenova/vit-gpt2-image-captioning';
|
|
179
|
-
initialized = false;
|
|
180
177
|
constructor(imageToTextModelName, crossEncoderModelName) {
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
}
|
|
178
|
+
// Note: imageToTextModelName parameter is kept for backward compatibility
|
|
179
|
+
// but is no longer used since we delegate to file-processor's implementation
|
|
184
180
|
// Create the underlying cross-encoder strategy
|
|
185
181
|
this.crossEncoderReranker = new CrossEncoderRerankingStrategy(crossEncoderModelName);
|
|
186
182
|
}
|
|
187
|
-
/**
|
|
188
|
-
* Initialize the image-to-text model if not already done
|
|
189
|
-
*/
|
|
190
|
-
async ensureInitialized() {
|
|
191
|
-
if (!this.initialized) {
|
|
192
|
-
try {
|
|
193
|
-
console.log(`Loading image-to-text model: ${this.imageToTextModelName}`);
|
|
194
|
-
// Set up polyfills for transformers.js
|
|
195
|
-
this.ensurePolyfills();
|
|
196
|
-
const { pipeline } = await import('@huggingface/transformers');
|
|
197
|
-
this.imageToTextModel = await pipeline('image-to-text', this.imageToTextModelName);
|
|
198
|
-
this.initialized = true;
|
|
199
|
-
console.log(`Image-to-text model loaded successfully: ${this.imageToTextModelName}`);
|
|
200
|
-
}
|
|
201
|
-
catch (error) {
|
|
202
|
-
console.warn(`Image-to-text model initialization failed: ${error instanceof Error ? error.message : 'Unknown error'}`);
|
|
203
|
-
this.isEnabled = false;
|
|
204
|
-
}
|
|
205
|
-
}
|
|
206
|
-
}
|
|
207
|
-
/**
|
|
208
|
-
* Ensure DOM polyfills are set up for transformers.js
|
|
209
|
-
*/
|
|
210
|
-
ensurePolyfills() {
|
|
211
|
-
if (typeof window === 'undefined' && typeof globalThis !== 'undefined') {
|
|
212
|
-
if (typeof globalThis.self === 'undefined') {
|
|
213
|
-
globalThis.self = globalThis;
|
|
214
|
-
}
|
|
215
|
-
if (typeof global.self === 'undefined') {
|
|
216
|
-
global.self = global;
|
|
217
|
-
}
|
|
218
|
-
}
|
|
219
|
-
}
|
|
220
183
|
/**
|
|
221
184
|
* Generate text description for an image
|
|
185
|
+
* Uses the shared image-to-text functionality from file-processor
|
|
222
186
|
*/
|
|
223
187
|
async generateImageDescription(imagePath) {
|
|
224
|
-
await this.ensureInitialized();
|
|
225
|
-
if (!this.imageToTextModel) {
|
|
226
|
-
throw new Error('Image-to-text model not loaded');
|
|
227
|
-
}
|
|
228
188
|
try {
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
}
|
|
234
|
-
else if (result && typeof result === 'object') {
|
|
235
|
-
return result.generated_text || result.text || String(result);
|
|
236
|
-
}
|
|
237
|
-
else {
|
|
238
|
-
return String(result);
|
|
239
|
-
}
|
|
189
|
+
// Use the file-processor's image description function which has proven to work reliably
|
|
190
|
+
const { generateImageDescriptionForFile } = await import('../file-processor.js');
|
|
191
|
+
const result = await generateImageDescriptionForFile(imagePath);
|
|
192
|
+
return result.description;
|
|
240
193
|
}
|
|
241
194
|
catch (error) {
|
|
242
195
|
console.warn(`Failed to generate description for image ${imagePath}: ${error instanceof Error ? error.message : 'Unknown error'}`);
|
|
@@ -249,22 +202,11 @@ export class TextDerivedRerankingStrategy {
|
|
|
249
202
|
* Rerank search results using text-derived approach
|
|
250
203
|
*/
|
|
251
204
|
rerank = async (query, results, contentType) => {
|
|
252
|
-
// If strategy is disabled, return results unchanged
|
|
253
|
-
if (!this.isEnabled) {
|
|
254
|
-
return results;
|
|
255
|
-
}
|
|
256
205
|
// Validate content type
|
|
257
206
|
if (contentType && !this.supportedContentTypes.includes(contentType)) {
|
|
258
207
|
throw new Error(`Text-derived strategy does not support content type '${contentType}'. ` +
|
|
259
208
|
`Supported types: ${this.supportedContentTypes.join(', ')}`);
|
|
260
209
|
}
|
|
261
|
-
// Ensure models are initialized
|
|
262
|
-
await this.ensureInitialized();
|
|
263
|
-
// If initialization failed, return results unchanged
|
|
264
|
-
if (!this.isEnabled) {
|
|
265
|
-
console.warn('Text-derived reranker not enabled, returning results unchanged');
|
|
266
|
-
return results;
|
|
267
|
-
}
|
|
268
210
|
try {
|
|
269
211
|
// Step 1: Convert images to text descriptions
|
|
270
212
|
const processedResults = await Promise.all(results.map(async (result) => {
|
|
@@ -314,12 +256,8 @@ export class TextDerivedRerankingStrategy {
|
|
|
314
256
|
* Configure the reranking strategy
|
|
315
257
|
*/
|
|
316
258
|
configure(config) {
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
// Reset initialization to use new model
|
|
320
|
-
this.initialized = false;
|
|
321
|
-
this.imageToTextModel = null;
|
|
322
|
-
}
|
|
259
|
+
// Note: imageToTextModel configuration is no longer used
|
|
260
|
+
// since we delegate to file-processor's implementation
|
|
323
261
|
if (config.crossEncoderModel && typeof config.crossEncoderModel === 'string') {
|
|
324
262
|
this.crossEncoderReranker.configure({ modelName: config.crossEncoderModel });
|
|
325
263
|
}
|
|
@@ -334,15 +272,10 @@ export class TextDerivedRerankingStrategy {
|
|
|
334
272
|
return {
|
|
335
273
|
description: 'Text-derived reranking that converts images to text descriptions then applies cross-encoder reranking',
|
|
336
274
|
requiredModels: [
|
|
337
|
-
'Xenova/vit-gpt2-image-captioning', // Image-to-text model
|
|
275
|
+
'Xenova/vit-gpt2-image-captioning', // Image-to-text model (via file-processor)
|
|
338
276
|
'Xenova/ms-marco-MiniLM-L-6-v2' // Cross-encoder model
|
|
339
277
|
],
|
|
340
278
|
configOptions: {
|
|
341
|
-
imageToTextModel: {
|
|
342
|
-
type: 'string',
|
|
343
|
-
description: 'Image-to-text model name for generating descriptions',
|
|
344
|
-
default: 'Xenova/vit-gpt2-image-captioning'
|
|
345
|
-
},
|
|
346
279
|
crossEncoderModel: {
|
|
347
280
|
type: 'string',
|
|
348
281
|
description: 'Cross-encoder model name for text reranking',
|
|
@@ -360,16 +293,15 @@ export class TextDerivedRerankingStrategy {
|
|
|
360
293
|
* Check if the strategy is ready to use
|
|
361
294
|
*/
|
|
362
295
|
async isReady() {
|
|
363
|
-
await this.ensureInitialized();
|
|
364
296
|
const crossEncoderReady = await this.crossEncoderReranker.isReady();
|
|
365
|
-
return this.isEnabled &&
|
|
297
|
+
return this.isEnabled && crossEncoderReady;
|
|
366
298
|
}
|
|
367
299
|
/**
|
|
368
300
|
* Get the current model names being used
|
|
369
301
|
*/
|
|
370
302
|
getModelNames() {
|
|
371
303
|
return {
|
|
372
|
-
imageToText:
|
|
304
|
+
imageToText: 'Xenova/vit-gpt2-image-captioning', // Fixed model via file-processor
|
|
373
305
|
crossEncoder: this.crossEncoderReranker.getModelName()
|
|
374
306
|
};
|
|
375
307
|
}
|
|
@@ -377,8 +309,6 @@ export class TextDerivedRerankingStrategy {
|
|
|
377
309
|
* Clean up resources
|
|
378
310
|
*/
|
|
379
311
|
async cleanup() {
|
|
380
|
-
this.initialized = false;
|
|
381
|
-
this.imageToTextModel = null;
|
|
382
312
|
await this.crossEncoderReranker.cleanup();
|
|
383
313
|
}
|
|
384
314
|
}
|
package/dist/dom-polyfills.js
CHANGED
|
@@ -30,11 +30,8 @@ if (typeof window === 'undefined') {
|
|
|
30
30
|
if (typeof globalThis.navigator === 'undefined') {
|
|
31
31
|
globalThis.navigator = dom.window.navigator;
|
|
32
32
|
}
|
|
33
|
-
//
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
throw new Error('createImageBitmap not available in Node.js environment');
|
|
37
|
-
});
|
|
38
|
-
}
|
|
33
|
+
// Note: Do NOT polyfill createImageBitmap with a fake implementation
|
|
34
|
+
// RawImage.fromURL() will handle image loading correctly without it
|
|
35
|
+
// Setting a fake createImageBitmap that throws errors breaks image loading
|
|
39
36
|
}
|
|
40
37
|
//# sourceMappingURL=dom-polyfills.js.map
|
|
@@ -421,18 +421,35 @@ export class TextIngestionFactory {
|
|
|
421
421
|
console.log(`📁 Creating index directory: ${indexDir}`);
|
|
422
422
|
mkdirSync(indexDir, { recursive: true });
|
|
423
423
|
}
|
|
424
|
-
// Step 1:
|
|
425
|
-
const
|
|
424
|
+
// Step 1: Determine effective mode and select appropriate default model
|
|
425
|
+
const effectiveMode = options.mode || 'text';
|
|
426
|
+
// Step 1.5: Select model based on mode if not explicitly provided
|
|
427
|
+
let effectiveModel;
|
|
428
|
+
if (options.embeddingModel) {
|
|
429
|
+
// Use explicitly provided model
|
|
430
|
+
effectiveModel = options.embeddingModel;
|
|
431
|
+
}
|
|
432
|
+
else {
|
|
433
|
+
// Select default model based on mode
|
|
434
|
+
if (effectiveMode === 'multimodal') {
|
|
435
|
+
const { DEFAULT_MODELS } = await import('../core/model-registry.js');
|
|
436
|
+
effectiveModel = DEFAULT_MODELS['clip'];
|
|
437
|
+
console.log(`📊 No model specified for multimodal mode, using default: ${effectiveModel}`);
|
|
438
|
+
}
|
|
439
|
+
else {
|
|
440
|
+
effectiveModel = config.embedding_model;
|
|
441
|
+
}
|
|
442
|
+
}
|
|
443
|
+
// Step 2: Get model-specific defaults and merge with options
|
|
444
|
+
const modelDefaults = getModelDefaults(effectiveModel);
|
|
426
445
|
const effectiveBatchSize = options.batchSize ?? modelDefaults.batch_size;
|
|
427
446
|
const effectiveChunkSize = options.chunkSize ?? modelDefaults.chunk_size;
|
|
428
447
|
const effectiveChunkOverlap = options.chunkOverlap ?? modelDefaults.chunk_overlap;
|
|
429
|
-
// Step
|
|
430
|
-
const effectiveMode = options.mode || 'text';
|
|
431
|
-
const effectiveModel = options.embeddingModel || config.embedding_model;
|
|
448
|
+
// Step 3: Validate mode-model compatibility at creation time
|
|
432
449
|
console.log('🔍 Validating mode-model compatibility...');
|
|
433
450
|
validateModeModelCompatibilityOrThrow(effectiveMode, effectiveModel);
|
|
434
451
|
console.log('✓ Mode-model compatibility validated');
|
|
435
|
-
// Step
|
|
452
|
+
// Step 4: Initialize embedding function based on mode
|
|
436
453
|
let embedFn;
|
|
437
454
|
if (effectiveMode === 'multimodal') {
|
|
438
455
|
console.log('📊 Loading CLIP embedding model for multimodal mode...');
|
|
@@ -463,10 +480,10 @@ export class TextIngestionFactory {
|
|
|
463
480
|
await initializeSchema(db);
|
|
464
481
|
console.log('✓ Database connection established');
|
|
465
482
|
// Step 3.1: Handle mode storage during ingestion
|
|
466
|
-
await this.handleModeStorage(db, options, modelDefaults);
|
|
467
|
-
// Step
|
|
483
|
+
await this.handleModeStorage(db, options, modelDefaults, effectiveModel);
|
|
484
|
+
// Step 5: Initialize index manager
|
|
468
485
|
console.log('📇 Initializing vector index...');
|
|
469
|
-
const indexManager = new IndexManager(indexPath, dbPath, modelDefaults.dimensions,
|
|
486
|
+
const indexManager = new IndexManager(indexPath, dbPath, modelDefaults.dimensions, effectiveModel);
|
|
470
487
|
// Check if we need to force recreation due to model change
|
|
471
488
|
let forceRecreate = false;
|
|
472
489
|
if (options.forceRebuild && existsSync(indexPath) && existsSync(dbPath)) {
|
|
@@ -477,9 +494,8 @@ export class TextIngestionFactory {
|
|
|
477
494
|
const tempDb = await openDatabase(dbPath);
|
|
478
495
|
try {
|
|
479
496
|
const storedModel = await getStoredModelInfo(tempDb);
|
|
480
|
-
|
|
481
|
-
|
|
482
|
-
console.log(`🔄 Model change detected: ${storedModel.modelName} → ${currentModel}`);
|
|
497
|
+
if (storedModel && storedModel.modelName !== effectiveModel) {
|
|
498
|
+
console.log(`🔄 Model change detected: ${storedModel.modelName} → ${effectiveModel}`);
|
|
483
499
|
console.log(`🔄 Dimensions change: ${storedModel.dimensions} → ${modelDefaults.dimensions}`);
|
|
484
500
|
}
|
|
485
501
|
else if (storedModel && storedModel.dimensions !== modelDefaults.dimensions) {
|
|
@@ -503,9 +519,8 @@ export class TextIngestionFactory {
|
|
|
503
519
|
// Update stored model info when rebuilding or creating new index
|
|
504
520
|
if (options.forceRebuild || forceRecreate) {
|
|
505
521
|
const { setStoredModelInfo } = await import('../core/db.js');
|
|
506
|
-
|
|
507
|
-
|
|
508
|
-
console.log(`✓ Updated stored model info: ${currentModel} (${modelDefaults.dimensions} dimensions)`);
|
|
522
|
+
await setStoredModelInfo(db, effectiveModel, modelDefaults.dimensions);
|
|
523
|
+
console.log(`✓ Updated stored model info: ${effectiveModel} (${modelDefaults.dimensions} dimensions)`);
|
|
509
524
|
}
|
|
510
525
|
}
|
|
511
526
|
else {
|
|
@@ -555,11 +570,10 @@ export class TextIngestionFactory {
|
|
|
555
570
|
* Creates or validates system info based on the provided mode and options
|
|
556
571
|
* @private
|
|
557
572
|
*/
|
|
558
|
-
static async handleModeStorage(db, options, modelDefaults) {
|
|
573
|
+
static async handleModeStorage(db, options, modelDefaults, effectiveModel) {
|
|
559
574
|
const { getSystemInfo, setSystemInfo } = await import('../core/db.js');
|
|
560
|
-
// Determine the effective mode and
|
|
575
|
+
// Determine the effective mode and reranking strategy
|
|
561
576
|
const effectiveMode = options.mode || 'text';
|
|
562
|
-
const effectiveModel = options.embeddingModel || config.embedding_model;
|
|
563
577
|
const effectiveRerankingStrategy = options.rerankingStrategy || 'cross-encoder';
|
|
564
578
|
// Determine model type based on model name
|
|
565
579
|
let modelType;
|
package/dist/file-processor.js
CHANGED
|
@@ -346,24 +346,35 @@ function extractTitle(content, filePath) {
|
|
|
346
346
|
* Cache for image-to-text pipeline to avoid reloading
|
|
347
347
|
*/
|
|
348
348
|
let imageToTextPipeline = null;
|
|
349
|
+
let imageToTextPipelinePromise = null;
|
|
349
350
|
/**
|
|
350
|
-
* Initialize the image-to-text pipeline
|
|
351
|
+
* Initialize the image-to-text pipeline with proper async locking
|
|
351
352
|
*/
|
|
352
353
|
async function initializeImageToTextPipeline(modelName = 'Xenova/vit-gpt2-image-captioning') {
|
|
354
|
+
// Return cached pipeline if available
|
|
353
355
|
if (imageToTextPipeline) {
|
|
354
356
|
return imageToTextPipeline;
|
|
355
357
|
}
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
imageToTextPipeline = await pipeline('image-to-text', modelName);
|
|
360
|
-
console.log(`Successfully loaded image-to-text model: ${modelName}`);
|
|
361
|
-
return imageToTextPipeline;
|
|
362
|
-
}
|
|
363
|
-
catch (error) {
|
|
364
|
-
console.error(`Failed to load image-to-text model ${modelName}:`, error);
|
|
365
|
-
throw new Error(`Failed to initialize image-to-text pipeline: ${error instanceof Error ? error.message : String(error)}`);
|
|
358
|
+
// If pipeline is currently loading, wait for it
|
|
359
|
+
if (imageToTextPipelinePromise) {
|
|
360
|
+
return imageToTextPipelinePromise;
|
|
366
361
|
}
|
|
362
|
+
// Start loading pipeline
|
|
363
|
+
imageToTextPipelinePromise = (async () => {
|
|
364
|
+
try {
|
|
365
|
+
const { pipeline } = await import('@huggingface/transformers');
|
|
366
|
+
console.log(`Loading image-to-text model: ${modelName}`);
|
|
367
|
+
imageToTextPipeline = await pipeline('image-to-text', modelName);
|
|
368
|
+
console.log(`Successfully loaded image-to-text model: ${modelName}`);
|
|
369
|
+
return imageToTextPipeline;
|
|
370
|
+
}
|
|
371
|
+
catch (error) {
|
|
372
|
+
console.error(`Failed to load image-to-text model ${modelName}:`, error);
|
|
373
|
+
imageToTextPipelinePromise = null; // Reset on error so it can be retried
|
|
374
|
+
throw new Error(`Failed to initialize image-to-text pipeline: ${error instanceof Error ? error.message : String(error)}`);
|
|
375
|
+
}
|
|
376
|
+
})();
|
|
377
|
+
return imageToTextPipelinePromise;
|
|
367
378
|
}
|
|
368
379
|
/**
|
|
369
380
|
* Parse PNG image dimensions from file buffer
|
|
@@ -545,8 +556,11 @@ async function extractImageMetadata(imagePath) {
|
|
|
545
556
|
async function generateImageDescription(imagePath, options = DEFAULT_IMAGE_TO_TEXT_OPTIONS) {
|
|
546
557
|
try {
|
|
547
558
|
const pipeline = await initializeImageToTextPipeline(options.model);
|
|
548
|
-
//
|
|
549
|
-
const
|
|
559
|
+
// Load image using RawImage.fromURL which works with local file paths
|
|
560
|
+
const { RawImage } = await import('@huggingface/transformers');
|
|
561
|
+
const image = await RawImage.fromURL(imagePath);
|
|
562
|
+
// Generate description with loaded image
|
|
563
|
+
const result = await pipeline(image, {
|
|
550
564
|
max_length: options.maxLength || 50,
|
|
551
565
|
num_beams: 4,
|
|
552
566
|
early_stopping: true
|
|
@@ -597,93 +611,6 @@ async function generateImageDescriptionsBatch(imagePaths, options = DEFAULT_IMAG
|
|
|
597
611
|
}
|
|
598
612
|
return results;
|
|
599
613
|
}
|
|
600
|
-
/**
|
|
601
|
-
* Generate text descriptions for multiple images using optimized batch processing
|
|
602
|
-
* Uses BatchProcessingOptimizer for memory-efficient processing of large image collections
|
|
603
|
-
*/
|
|
604
|
-
async function generateImageDescriptionsBatchOptimized(imagePaths, options = DEFAULT_IMAGE_TO_TEXT_OPTIONS) {
|
|
605
|
-
// For small batches, use the existing implementation
|
|
606
|
-
if (imagePaths.length <= 10) {
|
|
607
|
-
return generateImageDescriptionsBatch(imagePaths, options);
|
|
608
|
-
}
|
|
609
|
-
try {
|
|
610
|
-
// Import batch processing optimizer
|
|
611
|
-
const { createImageBatchProcessor } = await import('./core/batch-processing-optimizer.js');
|
|
612
|
-
const batchProcessor = createImageBatchProcessor();
|
|
613
|
-
// Convert image paths to batch items
|
|
614
|
-
const batchItems = imagePaths.map(path => ({
|
|
615
|
-
content: path,
|
|
616
|
-
contentType: 'image',
|
|
617
|
-
metadata: { originalPath: path }
|
|
618
|
-
}));
|
|
619
|
-
// Create image description function
|
|
620
|
-
const imageDescriptionFunction = async (item) => {
|
|
621
|
-
try {
|
|
622
|
-
const result = await generateImageDescription(item.content, options);
|
|
623
|
-
return {
|
|
624
|
-
embedding_id: `img_desc_${Date.now()}_${Math.random()}`,
|
|
625
|
-
vector: new Float32Array([0]), // Placeholder vector
|
|
626
|
-
contentType: 'image',
|
|
627
|
-
metadata: {
|
|
628
|
-
path: item.content,
|
|
629
|
-
description: result.description,
|
|
630
|
-
confidence: result.confidence,
|
|
631
|
-
model: result.model
|
|
632
|
-
}
|
|
633
|
-
};
|
|
634
|
-
}
|
|
635
|
-
catch (error) {
|
|
636
|
-
throw new Error(`Failed to generate description for ${item.content}: ${error instanceof Error ? error.message : String(error)}`);
|
|
637
|
-
}
|
|
638
|
-
};
|
|
639
|
-
// Process with optimization and progress reporting
|
|
640
|
-
const batchResult = await batchProcessor.processBatch(batchItems, imageDescriptionFunction, (stats) => {
|
|
641
|
-
console.log(`Image description progress: ${stats.processedItems}/${stats.totalItems} (${Math.round((stats.processedItems / stats.totalItems) * 100)}%)`);
|
|
642
|
-
console.log(` Memory usage: ${stats.memoryUsageMB}MB (peak: ${stats.peakMemoryUsageMB}MB)`);
|
|
643
|
-
if (stats.failedItems > 0) {
|
|
644
|
-
console.log(` Failed items: ${stats.failedItems}`);
|
|
645
|
-
}
|
|
646
|
-
});
|
|
647
|
-
// Log final statistics
|
|
648
|
-
console.log(`✓ Image description generation complete:`);
|
|
649
|
-
console.log(` Processed: ${batchResult.stats.processedItems}/${batchResult.stats.totalItems}`);
|
|
650
|
-
console.log(` Failed: ${batchResult.stats.failedItems}`);
|
|
651
|
-
console.log(` Processing time: ${Math.round(batchResult.stats.processingTimeMs / 1000)}s`);
|
|
652
|
-
console.log(` Rate: ${Math.round(batchResult.stats.itemsPerSecond)} images/sec`);
|
|
653
|
-
console.log(` Peak memory usage: ${batchResult.stats.peakMemoryUsageMB}MB`);
|
|
654
|
-
if (batchResult.stats.retryCount > 0) {
|
|
655
|
-
console.log(` Retries: ${batchResult.stats.retryCount}`);
|
|
656
|
-
}
|
|
657
|
-
// Convert results back to expected format
|
|
658
|
-
const results = [];
|
|
659
|
-
// Add successful results
|
|
660
|
-
for (const result of batchResult.results) {
|
|
661
|
-
if (result.metadata?.description) {
|
|
662
|
-
results.push({
|
|
663
|
-
path: result.metadata.path,
|
|
664
|
-
result: {
|
|
665
|
-
description: result.metadata.description,
|
|
666
|
-
confidence: result.metadata.confidence,
|
|
667
|
-
model: result.metadata.model
|
|
668
|
-
}
|
|
669
|
-
});
|
|
670
|
-
}
|
|
671
|
-
}
|
|
672
|
-
// Add failed results
|
|
673
|
-
for (const error of batchResult.errors) {
|
|
674
|
-
results.push({
|
|
675
|
-
path: error.item.content,
|
|
676
|
-
error: error.error
|
|
677
|
-
});
|
|
678
|
-
}
|
|
679
|
-
return results;
|
|
680
|
-
}
|
|
681
|
-
catch (error) {
|
|
682
|
-
console.warn(`Optimized batch processing failed, falling back to standard batch processing: ${error instanceof Error ? error.message : String(error)}`);
|
|
683
|
-
// Fall back to existing implementation
|
|
684
|
-
return generateImageDescriptionsBatch(imagePaths, options);
|
|
685
|
-
}
|
|
686
|
-
}
|
|
687
614
|
/**
|
|
688
615
|
* Process image file to extract text description and metadata
|
|
689
616
|
*/
|
|
@@ -834,8 +761,8 @@ export async function processFiles(filePaths, pathManager, imageToTextOptions) {
|
|
|
834
761
|
if (imageFiles.length > 0) {
|
|
835
762
|
console.log(`Processing ${imageFiles.length} image files with optimized batch processing`);
|
|
836
763
|
try {
|
|
837
|
-
// Use
|
|
838
|
-
const batchResults = await
|
|
764
|
+
// Use batch processing for image descriptions
|
|
765
|
+
const batchResults = await generateImageDescriptionsBatch(imageFiles, imageToTextOptions);
|
|
839
766
|
// Convert batch results to documents with metadata extraction
|
|
840
767
|
for (const batchResult of batchResults) {
|
|
841
768
|
try {
|
|
@@ -961,6 +888,7 @@ export async function cleanupImageProcessingResources() {
|
|
|
961
888
|
await imageToTextPipeline.dispose();
|
|
962
889
|
}
|
|
963
890
|
imageToTextPipeline = null;
|
|
891
|
+
imageToTextPipelinePromise = null;
|
|
964
892
|
console.log('Image-to-text pipeline cleaned up');
|
|
965
893
|
}
|
|
966
894
|
catch (error) {
|
package/dist/indexer.js
CHANGED
|
@@ -16,11 +16,14 @@ async function main() {
|
|
|
16
16
|
console.error(' <path> File or directory path to ingest (.md and .txt files)');
|
|
17
17
|
console.error('');
|
|
18
18
|
console.error('Examples:');
|
|
19
|
-
console.error(' node indexer.js ./docs/ # Ingest all .md/.txt files in docs/');
|
|
19
|
+
console.error(' node indexer.js ./docs/ # Ingest all .md/.txt/.pdf/.docs files in docs/');
|
|
20
20
|
console.error(' node indexer.js ./readme.md # Ingest single file');
|
|
21
21
|
console.error(' node indexer.js ../project/docs/ # Ingest from parent directory');
|
|
22
22
|
console.error('');
|
|
23
|
-
console.error('Supported file types:
|
|
23
|
+
console.error('Supported file types:');
|
|
24
|
+
console.error(' Text: .md, .txt, .mdx');
|
|
25
|
+
console.error(' Documents: .pdf, .docx');
|
|
26
|
+
console.error(' Images (multimodal mode): .jpg, .jpeg, .png, .gif, .webp, .bmp');
|
|
24
27
|
console.error('');
|
|
25
28
|
console.error('After ingestion, use: node search.js "your query"');
|
|
26
29
|
process.exit(EXIT_CODES.INVALID_ARGUMENTS);
|
package/dist/ingestion.js
CHANGED
|
@@ -64,7 +64,12 @@ export class IngestionPipeline {
|
|
|
64
64
|
if (!this.corePipeline) {
|
|
65
65
|
throw new Error('IngestionPipeline failed to initialize');
|
|
66
66
|
}
|
|
67
|
-
|
|
67
|
+
// Merge mode from constructor options with runtime options
|
|
68
|
+
const mergedOptions = {
|
|
69
|
+
...options,
|
|
70
|
+
mode: options?.mode || this.options.mode
|
|
71
|
+
};
|
|
72
|
+
return this.corePipeline.ingestFile(filePath, mergedOptions);
|
|
68
73
|
}
|
|
69
74
|
/**
|
|
70
75
|
* Ingest all documents in a directory
|
|
@@ -74,7 +79,12 @@ export class IngestionPipeline {
|
|
|
74
79
|
if (!this.corePipeline) {
|
|
75
80
|
throw new Error('IngestionPipeline failed to initialize');
|
|
76
81
|
}
|
|
77
|
-
|
|
82
|
+
// Merge mode from constructor options with runtime options
|
|
83
|
+
const mergedOptions = {
|
|
84
|
+
...options,
|
|
85
|
+
mode: options?.mode || this.options.mode
|
|
86
|
+
};
|
|
87
|
+
return this.corePipeline.ingestDirectory(directoryPath, mergedOptions);
|
|
78
88
|
}
|
|
79
89
|
/**
|
|
80
90
|
* Ingest content from memory buffer
|
|
@@ -95,7 +105,12 @@ export class IngestionPipeline {
|
|
|
95
105
|
if (!this.corePipeline) {
|
|
96
106
|
throw new Error('IngestionPipeline failed to initialize');
|
|
97
107
|
}
|
|
98
|
-
|
|
108
|
+
// Merge mode from constructor options with runtime options
|
|
109
|
+
const mergedOptions = {
|
|
110
|
+
...options,
|
|
111
|
+
mode: options?.mode || this.options.mode
|
|
112
|
+
};
|
|
113
|
+
return this.corePipeline.ingestFromMemory(content, metadata, mergedOptions);
|
|
99
114
|
}
|
|
100
115
|
/**
|
|
101
116
|
* Clean up resources
|
package/dist/mcp-server.js
CHANGED
|
@@ -501,16 +501,23 @@ class RagLiteMCPServer {
|
|
|
501
501
|
catch (error) {
|
|
502
502
|
throw new Error(`Cannot access path: ${args.path}. Check permissions.`);
|
|
503
503
|
}
|
|
504
|
-
// Validate
|
|
504
|
+
// Validate mode parameter
|
|
505
|
+
const mode = args.mode || 'text';
|
|
506
|
+
// Validate file type for single files (only formats with actual processing implementations)
|
|
505
507
|
if (stats.isFile()) {
|
|
506
|
-
const
|
|
508
|
+
const textExtensions = ['.md', '.txt', '.mdx', '.pdf', '.docx'];
|
|
509
|
+
const imageExtensions = ['.jpg', '.jpeg', '.png', '.gif', '.webp', '.bmp'];
|
|
510
|
+
const validExtensions = mode === 'multimodal'
|
|
511
|
+
? [...textExtensions, ...imageExtensions]
|
|
512
|
+
: textExtensions;
|
|
507
513
|
const hasValidExtension = validExtensions.some(ext => args.path.toLowerCase().endsWith(ext));
|
|
508
514
|
if (!hasValidExtension) {
|
|
509
|
-
|
|
515
|
+
const supportedTypes = mode === 'multimodal'
|
|
516
|
+
? '.md, .txt, .mdx, .pdf, .docx, .jpg, .jpeg, .png, .gif, .webp, .bmp'
|
|
517
|
+
: '.md, .txt, .mdx, .pdf, .docx';
|
|
518
|
+
throw new Error(`Unsupported file type: ${args.path}. Supported types: ${supportedTypes}`);
|
|
510
519
|
}
|
|
511
520
|
}
|
|
512
|
-
// Validate mode parameter
|
|
513
|
-
const mode = args.mode || 'text';
|
|
514
521
|
if (!['text', 'multimodal'].includes(mode)) {
|
|
515
522
|
throw new Error(`Invalid mode: ${mode}. Supported modes: text, multimodal`);
|
|
516
523
|
}
|
|
@@ -585,8 +592,8 @@ class RagLiteMCPServer {
|
|
|
585
592
|
chunks_per_second: result.processingTimeMs > 0 ?
|
|
586
593
|
Math.round(result.chunksCreated / (result.processingTimeMs / 1000) * 100) / 100 : 0,
|
|
587
594
|
supported_file_types: mode === 'multimodal'
|
|
588
|
-
? ['md', 'txt', 'jpg', 'jpeg', 'png', 'gif', 'webp']
|
|
589
|
-
: ['md', 'txt'],
|
|
595
|
+
? ['md', 'txt', 'mdx', 'pdf', 'docx', 'jpg', 'jpeg', 'png', 'gif', 'webp', 'bmp']
|
|
596
|
+
: ['md', 'txt', 'mdx', 'pdf', 'docx'],
|
|
590
597
|
success: true
|
|
591
598
|
};
|
|
592
599
|
return {
|
|
@@ -1132,7 +1139,7 @@ class RagLiteMCPServer {
|
|
|
1132
1139
|
text_search: true,
|
|
1133
1140
|
image_search: false,
|
|
1134
1141
|
multimodal_reranking: false,
|
|
1135
|
-
supported_file_types: ['md', 'txt']
|
|
1142
|
+
supported_file_types: ['md', 'txt', 'mdx', 'pdf', 'docx']
|
|
1136
1143
|
};
|
|
1137
1144
|
}
|
|
1138
1145
|
else if (systemInfo.mode === 'multimodal') {
|
|
@@ -1140,7 +1147,7 @@ class RagLiteMCPServer {
|
|
|
1140
1147
|
text_search: true,
|
|
1141
1148
|
image_search: true,
|
|
1142
1149
|
multimodal_reranking: true,
|
|
1143
|
-
supported_file_types: ['md', 'txt', 'jpg', 'png', 'gif', 'webp']
|
|
1150
|
+
supported_file_types: ['md', 'txt', 'mdx', 'pdf', 'docx', 'jpg', 'jpeg', 'png', 'gif', 'webp', 'bmp']
|
|
1144
1151
|
};
|
|
1145
1152
|
}
|
|
1146
1153
|
}
|
|
@@ -339,15 +339,13 @@ export class CLIPEmbedder extends BaseUniversalEmbedder {
|
|
|
339
339
|
throw new Error('CLIP text model or tokenizer not initialized');
|
|
340
340
|
}
|
|
341
341
|
try {
|
|
342
|
-
// Validate and truncate text if necessary (CLIP has a 77 token limit)
|
|
343
|
-
this.validateTextLength(text);
|
|
344
|
-
const finalProcessedText = this.truncateText(processedText);
|
|
345
342
|
// Use the validated CLIPTextModelWithProjection approach (no pixel_values errors)
|
|
346
343
|
// Tokenize text with CLIP's requirements
|
|
347
|
-
|
|
344
|
+
// The tokenizer handles truncation at 77 TOKENS (not characters)
|
|
345
|
+
const tokens = await this.tokenizer(processedText, {
|
|
348
346
|
padding: true,
|
|
349
347
|
truncation: true,
|
|
350
|
-
max_length: 77, // CLIP's text sequence length limit
|
|
348
|
+
max_length: 77, // CLIP's text sequence length limit (77 tokens)
|
|
351
349
|
return_tensors: 'pt'
|
|
352
350
|
});
|
|
353
351
|
// Log token information for debugging (only in development)
|
|
@@ -355,7 +353,7 @@ export class CLIPEmbedder extends BaseUniversalEmbedder {
|
|
|
355
353
|
const tokenIds = tokens.input_ids?.data || [];
|
|
356
354
|
const actualTokenCount = Array.from(tokenIds).filter((id) => id !== 0).length;
|
|
357
355
|
if (actualTokenCount >= 77) {
|
|
358
|
-
console.warn(`Text truncated: "${
|
|
356
|
+
console.warn(`Text truncated by tokenizer: "${processedText.substring(0, 50)}..." (truncated to 77 tokens)`);
|
|
359
357
|
}
|
|
360
358
|
}
|
|
361
359
|
// Generate text embedding using CLIPTextModelWithProjection
|
|
@@ -389,15 +387,15 @@ export class CLIPEmbedder extends BaseUniversalEmbedder {
|
|
|
389
387
|
console.warn(`Warning: Embedding normalization may be imprecise (magnitude: ${magnitudeAfterNorm.toFixed(6)})`);
|
|
390
388
|
}
|
|
391
389
|
// Generate unique embedding ID
|
|
392
|
-
const embeddingId = this.generateEmbeddingId(
|
|
390
|
+
const embeddingId = this.generateEmbeddingId(processedText, 'text');
|
|
393
391
|
return {
|
|
394
392
|
embedding_id: embeddingId,
|
|
395
393
|
vector: embedding,
|
|
396
394
|
contentType: 'text',
|
|
397
395
|
metadata: {
|
|
398
396
|
originalText: text,
|
|
399
|
-
processedText:
|
|
400
|
-
textLength:
|
|
397
|
+
processedText: processedText,
|
|
398
|
+
textLength: processedText.length,
|
|
401
399
|
embeddingMagnitudeBeforeNorm: magnitudeBeforeNorm,
|
|
402
400
|
embeddingMagnitudeAfterNorm: magnitudeAfterNorm,
|
|
403
401
|
normalized: true,
|
|
@@ -682,8 +680,9 @@ export class CLIPEmbedder extends BaseUniversalEmbedder {
|
|
|
682
680
|
const { createTextBatchProcessor } = await import('../core/batch-processing-optimizer.js');
|
|
683
681
|
const batchProcessor = createTextBatchProcessor();
|
|
684
682
|
// Convert to EmbeddingBatchItem format
|
|
683
|
+
// Let tokenizer handle truncation at 77 tokens (not characters)
|
|
685
684
|
const batchItems = textItems.map(item => ({
|
|
686
|
-
content:
|
|
685
|
+
content: item.content.trim(),
|
|
687
686
|
contentType: item.contentType,
|
|
688
687
|
metadata: item.metadata
|
|
689
688
|
}));
|
|
@@ -773,7 +772,8 @@ export class CLIPEmbedder extends BaseUniversalEmbedder {
|
|
|
773
772
|
*/
|
|
774
773
|
async processBatchText(textItems) {
|
|
775
774
|
// Prepare texts for batch processing
|
|
776
|
-
|
|
775
|
+
// Let tokenizer handle truncation at 77 tokens (not characters)
|
|
776
|
+
const texts = textItems.map(item => item.content.trim());
|
|
777
777
|
// Tokenize all texts in batch
|
|
778
778
|
const tokensBatch = await Promise.all(texts.map(text => this.tokenizer(text, {
|
|
779
779
|
padding: true,
|
package/package.json
CHANGED