rag-lite-ts 2.0.2 → 2.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +27 -0
- package/dist/cli/indexer.js +21 -2
- package/dist/cli.js +2 -2
- package/dist/factories/text-factory.js +32 -18
- package/dist/indexer.js +5 -2
- package/dist/mcp-server.js +16 -9
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -438,6 +438,33 @@ Now Claude can search your docs directly! Works with any MCP-compatible AI tool.
|
|
|
438
438
|
</tr>
|
|
439
439
|
</table>
|
|
440
440
|
|
|
441
|
+
### 📁 Supported File Formats
|
|
442
|
+
|
|
443
|
+
RAG-lite TS supports the following file formats with full processing implementations:
|
|
444
|
+
|
|
445
|
+
**Text Mode:**
|
|
446
|
+
- Markdown: `.md`, `.mdx`
|
|
447
|
+
- Plain text: `.txt`
|
|
448
|
+
- Documents: `.pdf`, `.docx`
|
|
449
|
+
|
|
450
|
+
**Multimodal Mode** (includes all text formats plus):
|
|
451
|
+
- Images: `.jpg`, `.jpeg`, `.png`, `.gif`, `.webp`, `.bmp`
|
|
452
|
+
|
|
453
|
+
All formats work seamlessly with both single file and directory ingestion:
|
|
454
|
+
|
|
455
|
+
```bash
|
|
456
|
+
# Single file ingestion
|
|
457
|
+
raglite ingest ./document.pdf
|
|
458
|
+
raglite ingest ./readme.md
|
|
459
|
+
raglite ingest ./notes.txt
|
|
460
|
+
|
|
461
|
+
# Directory ingestion (processes all supported formats)
|
|
462
|
+
raglite ingest ./docs/
|
|
463
|
+
|
|
464
|
+
# Multimodal ingestion (includes images)
|
|
465
|
+
raglite ingest ./mixed-content/ --mode multimodal
|
|
466
|
+
```
|
|
467
|
+
|
|
441
468
|
## 🔧 How It Works
|
|
442
469
|
|
|
443
470
|
RAG-lite TS follows a clean, efficient pipeline:
|
package/dist/cli/indexer.js
CHANGED
|
@@ -148,12 +148,31 @@ export async function runIngest(path, options = {}) {
|
|
|
148
148
|
const pathType = stats.isDirectory() ? 'directory' : 'file';
|
|
149
149
|
// Validate file type for single files
|
|
150
150
|
if (stats.isFile()) {
|
|
151
|
-
const
|
|
151
|
+
const mode = options.mode || 'text';
|
|
152
|
+
// Only formats with actual processing implementations
|
|
153
|
+
const textExtensions = ['.md', '.txt', '.mdx', '.pdf', '.docx'];
|
|
154
|
+
const imageExtensions = ['.jpg', '.jpeg', '.png', '.gif', '.webp', '.bmp'];
|
|
155
|
+
const validExtensions = mode === 'multimodal'
|
|
156
|
+
? [...textExtensions, ...imageExtensions]
|
|
157
|
+
: textExtensions;
|
|
152
158
|
const hasValidExtension = validExtensions.some(ext => path.toLowerCase().endsWith(ext));
|
|
153
159
|
if (!hasValidExtension) {
|
|
154
160
|
console.error(`Error: Unsupported file type: ${path}`);
|
|
155
161
|
console.error('');
|
|
156
|
-
|
|
162
|
+
if (mode === 'multimodal') {
|
|
163
|
+
console.error('Supported file types in multimodal mode:');
|
|
164
|
+
console.error(' Text: .md, .txt, .mdx');
|
|
165
|
+
console.error(' Documents: .pdf, .docx');
|
|
166
|
+
console.error(' Images: .jpg, .jpeg, .png, .gif, .webp, .bmp');
|
|
167
|
+
}
|
|
168
|
+
else {
|
|
169
|
+
console.error('Supported file types in text mode:');
|
|
170
|
+
console.error(' Text: .md, .txt, .mdx');
|
|
171
|
+
console.error(' Documents: .pdf, .docx');
|
|
172
|
+
console.error('');
|
|
173
|
+
console.error('For image files, use --mode multimodal:');
|
|
174
|
+
console.error(' raglite ingest <path> --mode multimodal');
|
|
175
|
+
}
|
|
157
176
|
console.error('');
|
|
158
177
|
console.error('If you want to ingest multiple files, provide a directory path instead.');
|
|
159
178
|
process.exit(EXIT_CODES.INVALID_ARGUMENTS);
|
package/dist/cli.js
CHANGED
|
@@ -26,7 +26,7 @@ Commands:
|
|
|
26
26
|
help Show this help message
|
|
27
27
|
|
|
28
28
|
Examples:
|
|
29
|
-
raglite ingest ./docs/ # Ingest all .md/.txt files in docs/
|
|
29
|
+
raglite ingest ./docs/ # Ingest all .md/.txt/.docx/.pdf files in docs/
|
|
30
30
|
raglite ingest ./readme.md # Ingest single file
|
|
31
31
|
raglite ingest ./docs/ --model Xenova/all-mpnet-base-v2 # Use higher quality model
|
|
32
32
|
raglite ingest ./docs/ --mode multimodal # Enable multimodal processing
|
|
@@ -126,7 +126,7 @@ function validateArgs(command, args, options) {
|
|
|
126
126
|
console.error('Usage: raglite ingest <path>');
|
|
127
127
|
console.error('');
|
|
128
128
|
console.error('Examples:');
|
|
129
|
-
console.error(' raglite ingest ./docs/ # Ingest all .md/.txt files in docs/');
|
|
129
|
+
console.error(' raglite ingest ./docs/ # Ingest all .md/.txt/.docx/.pdf files in docs/');
|
|
130
130
|
console.error(' raglite ingest ./readme.md # Ingest single file');
|
|
131
131
|
console.error(' raglite ingest ./docs/ --model Xenova/all-mpnet-base-v2 # Use higher quality model');
|
|
132
132
|
console.error(' raglite ingest ./docs/ --mode multimodal # Enable multimodal processing');
|
|
@@ -421,18 +421,35 @@ export class TextIngestionFactory {
|
|
|
421
421
|
console.log(`📁 Creating index directory: ${indexDir}`);
|
|
422
422
|
mkdirSync(indexDir, { recursive: true });
|
|
423
423
|
}
|
|
424
|
-
// Step 1:
|
|
425
|
-
const
|
|
424
|
+
// Step 1: Determine effective mode and select appropriate default model
|
|
425
|
+
const effectiveMode = options.mode || 'text';
|
|
426
|
+
// Step 1.5: Select model based on mode if not explicitly provided
|
|
427
|
+
let effectiveModel;
|
|
428
|
+
if (options.embeddingModel) {
|
|
429
|
+
// Use explicitly provided model
|
|
430
|
+
effectiveModel = options.embeddingModel;
|
|
431
|
+
}
|
|
432
|
+
else {
|
|
433
|
+
// Select default model based on mode
|
|
434
|
+
if (effectiveMode === 'multimodal') {
|
|
435
|
+
const { DEFAULT_MODELS } = await import('../core/model-registry.js');
|
|
436
|
+
effectiveModel = DEFAULT_MODELS['clip'];
|
|
437
|
+
console.log(`📊 No model specified for multimodal mode, using default: ${effectiveModel}`);
|
|
438
|
+
}
|
|
439
|
+
else {
|
|
440
|
+
effectiveModel = config.embedding_model;
|
|
441
|
+
}
|
|
442
|
+
}
|
|
443
|
+
// Step 2: Get model-specific defaults and merge with options
|
|
444
|
+
const modelDefaults = getModelDefaults(effectiveModel);
|
|
426
445
|
const effectiveBatchSize = options.batchSize ?? modelDefaults.batch_size;
|
|
427
446
|
const effectiveChunkSize = options.chunkSize ?? modelDefaults.chunk_size;
|
|
428
447
|
const effectiveChunkOverlap = options.chunkOverlap ?? modelDefaults.chunk_overlap;
|
|
429
|
-
// Step
|
|
430
|
-
const effectiveMode = options.mode || 'text';
|
|
431
|
-
const effectiveModel = options.embeddingModel || config.embedding_model;
|
|
448
|
+
// Step 3: Validate mode-model compatibility at creation time
|
|
432
449
|
console.log('🔍 Validating mode-model compatibility...');
|
|
433
450
|
validateModeModelCompatibilityOrThrow(effectiveMode, effectiveModel);
|
|
434
451
|
console.log('✓ Mode-model compatibility validated');
|
|
435
|
-
// Step
|
|
452
|
+
// Step 4: Initialize embedding function based on mode
|
|
436
453
|
let embedFn;
|
|
437
454
|
if (effectiveMode === 'multimodal') {
|
|
438
455
|
console.log('📊 Loading CLIP embedding model for multimodal mode...');
|
|
@@ -463,10 +480,10 @@ export class TextIngestionFactory {
|
|
|
463
480
|
await initializeSchema(db);
|
|
464
481
|
console.log('✓ Database connection established');
|
|
465
482
|
// Step 3.1: Handle mode storage during ingestion
|
|
466
|
-
await this.handleModeStorage(db, options, modelDefaults);
|
|
467
|
-
// Step
|
|
483
|
+
await this.handleModeStorage(db, options, modelDefaults, effectiveModel);
|
|
484
|
+
// Step 5: Initialize index manager
|
|
468
485
|
console.log('📇 Initializing vector index...');
|
|
469
|
-
const indexManager = new IndexManager(indexPath, dbPath, modelDefaults.dimensions,
|
|
486
|
+
const indexManager = new IndexManager(indexPath, dbPath, modelDefaults.dimensions, effectiveModel);
|
|
470
487
|
// Check if we need to force recreation due to model change
|
|
471
488
|
let forceRecreate = false;
|
|
472
489
|
if (options.forceRebuild && existsSync(indexPath) && existsSync(dbPath)) {
|
|
@@ -477,9 +494,8 @@ export class TextIngestionFactory {
|
|
|
477
494
|
const tempDb = await openDatabase(dbPath);
|
|
478
495
|
try {
|
|
479
496
|
const storedModel = await getStoredModelInfo(tempDb);
|
|
480
|
-
|
|
481
|
-
|
|
482
|
-
console.log(`🔄 Model change detected: ${storedModel.modelName} → ${currentModel}`);
|
|
497
|
+
if (storedModel && storedModel.modelName !== effectiveModel) {
|
|
498
|
+
console.log(`🔄 Model change detected: ${storedModel.modelName} → ${effectiveModel}`);
|
|
483
499
|
console.log(`🔄 Dimensions change: ${storedModel.dimensions} → ${modelDefaults.dimensions}`);
|
|
484
500
|
}
|
|
485
501
|
else if (storedModel && storedModel.dimensions !== modelDefaults.dimensions) {
|
|
@@ -503,9 +519,8 @@ export class TextIngestionFactory {
|
|
|
503
519
|
// Update stored model info when rebuilding or creating new index
|
|
504
520
|
if (options.forceRebuild || forceRecreate) {
|
|
505
521
|
const { setStoredModelInfo } = await import('../core/db.js');
|
|
506
|
-
|
|
507
|
-
|
|
508
|
-
console.log(`✓ Updated stored model info: ${currentModel} (${modelDefaults.dimensions} dimensions)`);
|
|
522
|
+
await setStoredModelInfo(db, effectiveModel, modelDefaults.dimensions);
|
|
523
|
+
console.log(`✓ Updated stored model info: ${effectiveModel} (${modelDefaults.dimensions} dimensions)`);
|
|
509
524
|
}
|
|
510
525
|
}
|
|
511
526
|
else {
|
|
@@ -555,11 +570,10 @@ export class TextIngestionFactory {
|
|
|
555
570
|
* Creates or validates system info based on the provided mode and options
|
|
556
571
|
* @private
|
|
557
572
|
*/
|
|
558
|
-
static async handleModeStorage(db, options, modelDefaults) {
|
|
573
|
+
static async handleModeStorage(db, options, modelDefaults, effectiveModel) {
|
|
559
574
|
const { getSystemInfo, setSystemInfo } = await import('../core/db.js');
|
|
560
|
-
// Determine the effective mode and
|
|
575
|
+
// Determine the effective mode and reranking strategy
|
|
561
576
|
const effectiveMode = options.mode || 'text';
|
|
562
|
-
const effectiveModel = options.embeddingModel || config.embedding_model;
|
|
563
577
|
const effectiveRerankingStrategy = options.rerankingStrategy || 'cross-encoder';
|
|
564
578
|
// Determine model type based on model name
|
|
565
579
|
let modelType;
|
package/dist/indexer.js
CHANGED
|
@@ -16,11 +16,14 @@ async function main() {
|
|
|
16
16
|
console.error(' <path> File or directory path to ingest (.md and .txt files)');
|
|
17
17
|
console.error('');
|
|
18
18
|
console.error('Examples:');
|
|
19
|
-
console.error(' node indexer.js ./docs/ # Ingest all .md/.txt files in docs/');
|
|
19
|
+
console.error(' node indexer.js ./docs/ # Ingest all .md/.txt/.pdf/.docs files in docs/');
|
|
20
20
|
console.error(' node indexer.js ./readme.md # Ingest single file');
|
|
21
21
|
console.error(' node indexer.js ../project/docs/ # Ingest from parent directory');
|
|
22
22
|
console.error('');
|
|
23
|
-
console.error('Supported file types:
|
|
23
|
+
console.error('Supported file types:');
|
|
24
|
+
console.error(' Text: .md, .txt, .mdx');
|
|
25
|
+
console.error(' Documents: .pdf, .docx');
|
|
26
|
+
console.error(' Images (multimodal mode): .jpg, .jpeg, .png, .gif, .webp, .bmp');
|
|
24
27
|
console.error('');
|
|
25
28
|
console.error('After ingestion, use: node search.js "your query"');
|
|
26
29
|
process.exit(EXIT_CODES.INVALID_ARGUMENTS);
|
package/dist/mcp-server.js
CHANGED
|
@@ -501,16 +501,23 @@ class RagLiteMCPServer {
|
|
|
501
501
|
catch (error) {
|
|
502
502
|
throw new Error(`Cannot access path: ${args.path}. Check permissions.`);
|
|
503
503
|
}
|
|
504
|
-
// Validate
|
|
504
|
+
// Validate mode parameter
|
|
505
|
+
const mode = args.mode || 'text';
|
|
506
|
+
// Validate file type for single files (only formats with actual processing implementations)
|
|
505
507
|
if (stats.isFile()) {
|
|
506
|
-
const
|
|
508
|
+
const textExtensions = ['.md', '.txt', '.mdx', '.pdf', '.docx'];
|
|
509
|
+
const imageExtensions = ['.jpg', '.jpeg', '.png', '.gif', '.webp', '.bmp'];
|
|
510
|
+
const validExtensions = mode === 'multimodal'
|
|
511
|
+
? [...textExtensions, ...imageExtensions]
|
|
512
|
+
: textExtensions;
|
|
507
513
|
const hasValidExtension = validExtensions.some(ext => args.path.toLowerCase().endsWith(ext));
|
|
508
514
|
if (!hasValidExtension) {
|
|
509
|
-
|
|
515
|
+
const supportedTypes = mode === 'multimodal'
|
|
516
|
+
? '.md, .txt, .mdx, .pdf, .docx, .jpg, .jpeg, .png, .gif, .webp, .bmp'
|
|
517
|
+
: '.md, .txt, .mdx, .pdf, .docx';
|
|
518
|
+
throw new Error(`Unsupported file type: ${args.path}. Supported types: ${supportedTypes}`);
|
|
510
519
|
}
|
|
511
520
|
}
|
|
512
|
-
// Validate mode parameter
|
|
513
|
-
const mode = args.mode || 'text';
|
|
514
521
|
if (!['text', 'multimodal'].includes(mode)) {
|
|
515
522
|
throw new Error(`Invalid mode: ${mode}. Supported modes: text, multimodal`);
|
|
516
523
|
}
|
|
@@ -585,8 +592,8 @@ class RagLiteMCPServer {
|
|
|
585
592
|
chunks_per_second: result.processingTimeMs > 0 ?
|
|
586
593
|
Math.round(result.chunksCreated / (result.processingTimeMs / 1000) * 100) / 100 : 0,
|
|
587
594
|
supported_file_types: mode === 'multimodal'
|
|
588
|
-
? ['md', 'txt', 'jpg', 'jpeg', 'png', 'gif', 'webp']
|
|
589
|
-
: ['md', 'txt'],
|
|
595
|
+
? ['md', 'txt', 'mdx', 'pdf', 'docx', 'jpg', 'jpeg', 'png', 'gif', 'webp', 'bmp']
|
|
596
|
+
: ['md', 'txt', 'mdx', 'pdf', 'docx'],
|
|
590
597
|
success: true
|
|
591
598
|
};
|
|
592
599
|
return {
|
|
@@ -1132,7 +1139,7 @@ class RagLiteMCPServer {
|
|
|
1132
1139
|
text_search: true,
|
|
1133
1140
|
image_search: false,
|
|
1134
1141
|
multimodal_reranking: false,
|
|
1135
|
-
supported_file_types: ['md', 'txt']
|
|
1142
|
+
supported_file_types: ['md', 'txt', 'mdx', 'pdf', 'docx']
|
|
1136
1143
|
};
|
|
1137
1144
|
}
|
|
1138
1145
|
else if (systemInfo.mode === 'multimodal') {
|
|
@@ -1140,7 +1147,7 @@ class RagLiteMCPServer {
|
|
|
1140
1147
|
text_search: true,
|
|
1141
1148
|
image_search: true,
|
|
1142
1149
|
multimodal_reranking: true,
|
|
1143
|
-
supported_file_types: ['md', 'txt', 'jpg', 'png', 'gif', 'webp']
|
|
1150
|
+
supported_file_types: ['md', 'txt', 'mdx', 'pdf', 'docx', 'jpg', 'jpeg', 'png', 'gif', 'webp', 'bmp']
|
|
1144
1151
|
};
|
|
1145
1152
|
}
|
|
1146
1153
|
}
|
package/package.json
CHANGED