rag-lite-ts 2.0.5 → 2.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -31,6 +31,22 @@ import { IngestionFactory } from './factories/ingestion-factory.js';
31
31
  import { getSystemInfo } from './core/db.js';
32
32
  import { DatabaseConnectionManager } from './core/database-connection-manager.js';
33
33
  import { config, validateCoreConfig, ConfigurationError } from './core/config.js';
34
+ /**
35
+ * Detect MIME type from file path or extension
36
+ */
37
+ function getMimeTypeFromPath(filePath) {
38
+ const ext = filePath.toLowerCase().split('.').pop() || '';
39
+ const mimeTypes = {
40
+ 'jpg': 'image/jpeg',
41
+ 'jpeg': 'image/jpeg',
42
+ 'png': 'image/png',
43
+ 'gif': 'image/gif',
44
+ 'webp': 'image/webp',
45
+ 'bmp': 'image/bmp',
46
+ 'svg': 'image/svg+xml'
47
+ };
48
+ return mimeTypes[ext] || 'image/jpeg'; // Default to JPEG if unknown
49
+ }
34
50
  /**
35
51
  * MCP Server class that wraps RAG-lite TS functionality
36
52
  * Implements MCP protocol interface without creating REST/GraphQL endpoints
@@ -118,8 +134,8 @@ class RagLiteMCPServer {
118
134
  },
119
135
  rerank_strategy: {
120
136
  type: 'string',
121
- description: 'Reranking strategy for multimodal mode. Options: text-derived (default), metadata, hybrid, disabled',
122
- enum: ['text-derived', 'metadata', 'hybrid', 'disabled']
137
+ description: 'Reranking strategy for multimodal mode. Options: text-derived (default), disabled',
138
+ enum: ['text-derived', 'disabled']
123
139
  },
124
140
  force_rebuild: {
125
141
  type: 'boolean',
@@ -152,8 +168,8 @@ class RagLiteMCPServer {
152
168
  },
153
169
  rerank_strategy: {
154
170
  type: 'string',
155
- description: 'Reranking strategy for multimodal mode. Options: text-derived (default), metadata, hybrid, disabled',
156
- enum: ['text-derived', 'metadata', 'hybrid', 'disabled'],
171
+ description: 'Reranking strategy for multimodal mode. Options: text-derived (default), disabled',
172
+ enum: ['text-derived', 'disabled'],
157
173
  default: 'text-derived'
158
174
  },
159
175
  title: {
@@ -375,50 +391,60 @@ class RagLiteMCPServer {
375
391
  const startTime = Date.now();
376
392
  const results = await this.searchEngine.search(args.query, searchOptions);
377
393
  const searchTime = Date.now() - startTime;
378
- // Format results for MCP response with content type information
379
- const formattedResults = {
394
+ // Format results for MCP response with proper image content support
395
+ const textResults = {
380
396
  query: args.query,
381
397
  results_count: results.length,
382
398
  search_time_ms: searchTime,
383
- results: await Promise.all(results.map(async (result, index) => {
384
- const formattedResult = {
385
- rank: index + 1,
386
- score: Math.round(result.score * 100) / 100, // Round to 2 decimal places
387
- content_type: result.contentType,
388
- document: {
389
- id: result.document.id,
390
- title: result.document.title,
391
- source: result.document.source,
392
- content_type: result.document.contentType
393
- },
394
- text: result.content
395
- };
396
- // For image content, include base64-encoded image data for MCP clients
397
- if (result.contentType === 'image' && result.document.contentId) {
398
- try {
399
- const imageData = await this.searchEngine.getContent(result.document.contentId, 'base64');
400
- formattedResult.image_data = imageData;
401
- formattedResult.image_format = 'base64';
402
- }
403
- catch (error) {
404
- // If image retrieval fails, include error but don't fail the entire search
405
- formattedResult.image_error = error instanceof Error ? error.message : 'Failed to retrieve image';
406
- }
407
- }
408
- // Include metadata if available
409
- if (result.metadata) {
410
- formattedResult.metadata = result.metadata;
411
- }
412
- return formattedResult;
399
+ results: results.map((result, index) => ({
400
+ rank: index + 1,
401
+ score: Math.round(result.score * 100) / 100,
402
+ content_type: result.contentType,
403
+ document: {
404
+ id: result.document.id,
405
+ title: result.document.title,
406
+ source: result.document.source,
407
+ content_type: result.document.contentType
408
+ },
409
+ text: result.content,
410
+ metadata: result.metadata,
411
+ // Reference to image content if applicable
412
+ has_image: result.contentType === 'image' && !!result.document.contentId
413
413
  }))
414
414
  };
415
+ // Build MCP response content array
416
+ const responseContent = [
417
+ {
418
+ type: 'text',
419
+ text: JSON.stringify(textResults, null, 2)
420
+ }
421
+ ];
422
+ // Add proper MCP image content for each image result
423
+ for (const result of results) {
424
+ if (result.contentType === 'image' && result.document.contentId) {
425
+ try {
426
+ const imageData = await this.searchEngine.getContent(result.document.contentId, 'base64');
427
+ const mimeType = getMimeTypeFromPath(result.document.source);
428
+ responseContent.push({
429
+ type: 'image',
430
+ data: imageData,
431
+ mimeType: mimeType,
432
+ annotations: {
433
+ audience: ['user'],
434
+ priority: 0.8,
435
+ title: result.document.title,
436
+ source: result.document.source
437
+ }
438
+ });
439
+ }
440
+ catch (error) {
441
+ // If image retrieval fails, log but don't fail the entire search
442
+ console.error(`Failed to retrieve image for ${result.document.source}:`, error);
443
+ }
444
+ }
445
+ }
415
446
  return {
416
- content: [
417
- {
418
- type: 'text',
419
- text: JSON.stringify(formattedResults, null, 2),
420
- },
421
- ],
447
+ content: responseContent
422
448
  };
423
449
  }
424
450
  catch (error) {
@@ -549,7 +575,7 @@ class RagLiteMCPServer {
549
575
  if (mode === 'text') {
550
576
  throw new Error('Reranking strategy parameter is only supported in multimodal mode');
551
577
  }
552
- const validStrategies = ['text-derived', 'metadata', 'hybrid', 'disabled'];
578
+ const validStrategies = ['text-derived', 'disabled'];
553
579
  if (!validStrategies.includes(args.rerank_strategy)) {
554
580
  throw new Error(`Invalid reranking strategy: ${args.rerank_strategy}. Supported strategies: ${validStrategies.join(', ')}`);
555
581
  }
@@ -1223,48 +1249,61 @@ class RagLiteMCPServer {
1223
1249
  const startTime = Date.now();
1224
1250
  const results = await this.searchEngine.search(args.query, searchOptions);
1225
1251
  const searchTime = Date.now() - startTime;
1226
- // Format results for MCP response with content type information and image data
1227
- const formattedResults = {
1252
+ // Format results for MCP response with proper image content support
1253
+ const textResults = {
1228
1254
  query: args.query,
1229
1255
  content_type_filter: args.content_type || 'all',
1230
1256
  results_count: results.length,
1231
1257
  search_time_ms: searchTime,
1232
- results: await Promise.all(results.map(async (result, index) => {
1233
- const formattedResult = {
1234
- rank: index + 1,
1235
- score: Math.round(result.score * 100) / 100,
1236
- content_type: result.contentType,
1237
- document: {
1238
- id: result.document.id,
1239
- title: result.document.title,
1240
- source: result.document.source,
1241
- content_type: result.document.contentType
1242
- },
1243
- text: result.content,
1244
- metadata: result.metadata
1245
- };
1246
- // For image content, include base64-encoded image data for MCP clients
1247
- if (result.contentType === 'image' && result.document.contentId) {
1248
- try {
1249
- const imageData = await this.searchEngine.getContent(result.document.contentId, 'base64');
1250
- formattedResult.image_data = imageData;
1251
- formattedResult.image_format = 'base64';
1252
- }
1253
- catch (error) {
1254
- // If image retrieval fails, include error but don't fail the entire search
1255
- formattedResult.image_error = error instanceof Error ? error.message : 'Failed to retrieve image';
1256
- }
1257
- }
1258
- return formattedResult;
1258
+ results: results.map((result, index) => ({
1259
+ rank: index + 1,
1260
+ score: Math.round(result.score * 100) / 100,
1261
+ content_type: result.contentType,
1262
+ document: {
1263
+ id: result.document.id,
1264
+ title: result.document.title,
1265
+ source: result.document.source,
1266
+ content_type: result.document.contentType
1267
+ },
1268
+ text: result.content,
1269
+ metadata: result.metadata,
1270
+ // Reference to image content if applicable
1271
+ has_image: result.contentType === 'image' && !!result.document.contentId
1259
1272
  }))
1260
1273
  };
1274
+ // Build MCP response content array
1275
+ const responseContent = [
1276
+ {
1277
+ type: 'text',
1278
+ text: JSON.stringify(textResults, null, 2)
1279
+ }
1280
+ ];
1281
+ // Add proper MCP image content for each image result
1282
+ for (const result of results) {
1283
+ if (result.contentType === 'image' && result.document.contentId) {
1284
+ try {
1285
+ const imageData = await this.searchEngine.getContent(result.document.contentId, 'base64');
1286
+ const mimeType = getMimeTypeFromPath(result.document.source);
1287
+ responseContent.push({
1288
+ type: 'image',
1289
+ data: imageData,
1290
+ mimeType: mimeType,
1291
+ annotations: {
1292
+ audience: ['user'],
1293
+ priority: 0.8,
1294
+ title: result.document.title,
1295
+ source: result.document.source
1296
+ }
1297
+ });
1298
+ }
1299
+ catch (error) {
1300
+ // If image retrieval fails, log but don't fail the entire search
1301
+ console.error(`Failed to retrieve image for ${result.document.source}:`, error);
1302
+ }
1303
+ }
1304
+ }
1261
1305
  return {
1262
- content: [
1263
- {
1264
- type: 'text',
1265
- text: JSON.stringify(formattedResults, null, 2),
1266
- },
1267
- ],
1306
+ content: responseContent
1268
1307
  };
1269
1308
  }
1270
1309
  catch (error) {
@@ -1418,23 +1457,6 @@ class RagLiteMCPServer {
1418
1457
  strategyInfo.accuracy = 'high';
1419
1458
  strategyInfo.use_cases = ['Mixed content with images', 'Visual documentation', 'Diagrams and charts'];
1420
1459
  break;
1421
- case 'metadata':
1422
- strategyInfo.description = 'Uses file metadata, filenames, and content properties for scoring without model inference';
1423
- strategyInfo.requirements = ['None - uses file system metadata only'];
1424
- strategyInfo.supported_content_types = ['text', 'image', 'pdf', 'docx'];
1425
- strategyInfo.performance_impact = 'low';
1426
- strategyInfo.accuracy = 'medium';
1427
- strategyInfo.use_cases = ['Fast retrieval', 'Filename-based search', 'Content type filtering'];
1428
- break;
1429
- case 'hybrid':
1430
- strategyInfo.description = 'Combines multiple reranking signals (semantic + metadata) with configurable weights';
1431
- strategyInfo.requirements = ['Text-derived reranker', 'Metadata reranker'];
1432
- strategyInfo.supported_content_types = ['text', 'image', 'pdf', 'docx'];
1433
- strategyInfo.performance_impact = 'high';
1434
- strategyInfo.accuracy = 'very high';
1435
- strategyInfo.use_cases = ['Best overall accuracy', 'Complex multimodal collections', 'Production systems'];
1436
- strategyInfo.default_weights = { semantic: 0.7, metadata: 0.3 };
1437
- break;
1438
1460
  case 'disabled':
1439
1461
  strategyInfo.description = 'No reranking applied - results ordered by vector similarity scores only';
1440
1462
  strategyInfo.requirements = ['None'];
@@ -1455,8 +1477,8 @@ class RagLiteMCPServer {
1455
1477
  strategies_by_mode: strategiesByMode,
1456
1478
  recommendations: {
1457
1479
  text_mode: 'Use cross-encoder for best accuracy, disabled for best performance',
1458
- multimodal_mode: 'Use hybrid for best accuracy, text-derived for good balance, metadata for fast retrieval',
1459
- development: 'Start with disabled or metadata for fast iteration, upgrade to cross-encoder/text-derived for production'
1480
+ multimodal_mode: 'Use text-derived for best accuracy, disabled for best performance',
1481
+ development: 'Start with disabled for fast iteration, upgrade to cross-encoder/text-derived for production'
1460
1482
  }
1461
1483
  };
1462
1484
  return {
@@ -1505,16 +1527,16 @@ class RagLiteMCPServer {
1505
1527
  const db = await DatabaseConnectionManager.getConnection(config.db_file);
1506
1528
  try {
1507
1529
  // Get document count by content type
1508
- const docsByType = await db.all(`
1509
- SELECT content_type, COUNT(*) as count
1510
- FROM documents
1511
- GROUP BY content_type
1530
+ const docsByType = await db.all(`
1531
+ SELECT content_type, COUNT(*) as count
1532
+ FROM documents
1533
+ GROUP BY content_type
1512
1534
  `);
1513
1535
  // Get chunk count by content type
1514
- const chunksByType = await db.all(`
1515
- SELECT content_type, COUNT(*) as count
1516
- FROM chunks
1517
- GROUP BY content_type
1536
+ const chunksByType = await db.all(`
1537
+ SELECT content_type, COUNT(*) as count
1538
+ FROM chunks
1539
+ GROUP BY content_type
1518
1540
  `);
1519
1541
  enhancedStats.content_breakdown = {
1520
1542
  documents_by_type: docsByType.reduce((acc, row) => {
@@ -338,76 +338,73 @@ export class CLIPEmbedder extends BaseUniversalEmbedder {
338
338
  if (!this.textModel || !this.tokenizer) {
339
339
  throw new Error('CLIP text model or tokenizer not initialized');
340
340
  }
341
- try {
342
- // Use the validated CLIPTextModelWithProjection approach (no pixel_values errors)
343
- // Tokenize text with CLIP's requirements
344
- // The tokenizer handles truncation at 77 TOKENS (not characters)
345
- const tokens = await this.tokenizer(processedText, {
346
- padding: true,
347
- truncation: true,
348
- max_length: 77, // CLIP's text sequence length limit (77 tokens)
349
- return_tensors: 'pt'
350
- });
351
- // Log token information for debugging (only in development)
352
- if (process.env.NODE_ENV === 'development') {
353
- const tokenIds = tokens.input_ids?.data || [];
354
- const actualTokenCount = Array.from(tokenIds).filter((id) => id !== 0).length;
355
- if (actualTokenCount >= 77) {
356
- console.warn(`Text truncated by tokenizer: "${processedText.substring(0, 50)}..." (truncated to 77 tokens)`);
357
- }
358
- }
359
- // Generate text embedding using CLIPTextModelWithProjection
360
- const output = await this.textModel(tokens);
361
- // Extract embedding from text_embeds (no pixel_values dependency)
362
- const embedding = new Float32Array(output.text_embeds.data);
363
- // Validate embedding dimensions and values
364
- if (embedding.length !== this.dimensions) {
365
- throw new Error(`CLIP embedding dimension mismatch: expected ${this.dimensions}, got ${embedding.length}`);
366
- }
367
- // Validate that all values are finite numbers
368
- const invalidValues = Array.from(embedding).filter(val => !isFinite(val) || isNaN(val));
369
- if (invalidValues.length > 0) {
370
- throw new Error(`CLIP embedding contains ${invalidValues.length} invalid values`);
371
- }
372
- // Validate embedding quality - should not be all zeros
373
- const nonZeroValues = Array.from(embedding).filter(val => Math.abs(val) > 1e-8);
374
- if (nonZeroValues.length === 0) {
375
- throw new Error('CLIP embedding is all zeros');
376
- }
377
- // Calculate embedding magnitude before normalization for quality assessment
378
- const magnitudeBeforeNorm = Math.sqrt(Array.from(embedding).reduce((sum, val) => sum + val * val, 0));
379
- if (magnitudeBeforeNorm < 1e-6) {
380
- throw new Error(`CLIP embedding has critically low magnitude: ${magnitudeBeforeNorm.toExponential(3)}`);
381
- }
382
- // Apply L2-normalization (CLIP models are trained with normalized embeddings)
383
- this.normalizeEmbedding(embedding);
384
- // Verify normalization was successful
385
- const magnitudeAfterNorm = Math.sqrt(Array.from(embedding).reduce((sum, val) => sum + val * val, 0));
386
- if (Math.abs(magnitudeAfterNorm - 1.0) > 0.01) {
387
- console.warn(`Warning: Embedding normalization may be imprecise (magnitude: ${magnitudeAfterNorm.toFixed(6)})`);
341
+ // Use the validated CLIPTextModelWithProjection approach (no pixel_values errors)
342
+ // Tokenize text with CLIP's requirements
343
+ // The tokenizer handles truncation at 77 TOKENS (not characters)
344
+ const tokens = await this.tokenizer(processedText, {
345
+ padding: true,
346
+ truncation: true,
347
+ max_length: 77, // CLIP's text sequence length limit (77 tokens)
348
+ return_tensors: 'pt'
349
+ });
350
+ // Log token information for debugging (only in development)
351
+ if (process.env.NODE_ENV === 'development') {
352
+ const tokenIds = tokens.input_ids?.data || [];
353
+ const actualTokenCount = Array.from(tokenIds).filter((id) => id !== 0).length;
354
+ if (actualTokenCount >= 77) {
355
+ console.warn(`Text truncated by tokenizer: "${processedText.substring(0, 50)}..." (truncated to 77 tokens)`);
356
+ }
357
+ }
358
+ // Generate text embedding using CLIPTextModelWithProjection
359
+ const output = await this.textModel(tokens);
360
+ // Extract embedding from text_embeds (no pixel_values dependency)
361
+ const embedding = new Float32Array(output.text_embeds.data);
362
+ // Validate embedding dimensions and values
363
+ if (embedding.length !== this.dimensions) {
364
+ throw new Error(`CLIP embedding dimension mismatch: expected ${this.dimensions}, got ${embedding.length}`);
365
+ }
366
+ // Validate that all values are finite numbers
367
+ const invalidValues = Array.from(embedding).filter(val => !isFinite(val) || isNaN(val));
368
+ if (invalidValues.length > 0) {
369
+ throw new Error(`CLIP embedding contains ${invalidValues.length} invalid values`);
370
+ }
371
+ // Validate embedding quality - should not be all zeros
372
+ const nonZeroValues = Array.from(embedding).filter(val => Math.abs(val) > 1e-8);
373
+ if (nonZeroValues.length === 0) {
374
+ throw new Error('CLIP embedding is all zeros');
375
+ }
376
+ // Calculate embedding magnitude before normalization for quality assessment
377
+ const magnitudeBeforeNorm = Math.sqrt(Array.from(embedding).reduce((sum, val) => sum + val * val, 0));
378
+ if (magnitudeBeforeNorm < 1e-6) {
379
+ throw new Error(`CLIP embedding has critically low magnitude: ${magnitudeBeforeNorm.toExponential(3)}`);
380
+ }
381
+ // Apply L2-normalization (CLIP models are trained with normalized embeddings)
382
+ this.normalizeEmbedding(embedding);
383
+ // Verify normalization was successful
384
+ const magnitudeAfterNorm = Math.sqrt(Array.from(embedding).reduce((sum, val) => sum + val * val, 0));
385
+ if (Math.abs(magnitudeAfterNorm - 1.0) > 0.01) {
386
+ console.warn(`Warning: Embedding normalization may be imprecise (magnitude: ${magnitudeAfterNorm.toFixed(6)})`);
387
+ }
388
+ // Log text embedding generation
389
+ console.log(`[CLIP] Generated text embedding for: "${processedText.substring(0, 30)}${processedText.length > 30 ? '...' : ''}"`);
390
+ // Generate unique embedding ID
391
+ const embeddingId = this.generateEmbeddingId(processedText, 'text');
392
+ return {
393
+ embedding_id: embeddingId,
394
+ vector: embedding,
395
+ contentType: 'text',
396
+ metadata: {
397
+ originalText: text,
398
+ processedText: processedText,
399
+ textLength: processedText.length,
400
+ embeddingMagnitudeBeforeNorm: magnitudeBeforeNorm,
401
+ embeddingMagnitudeAfterNorm: magnitudeAfterNorm,
402
+ normalized: true,
403
+ modelName: this.modelName,
404
+ modelType: this.modelType,
405
+ dimensions: this.dimensions
388
406
  }
389
- // Generate unique embedding ID
390
- const embeddingId = this.generateEmbeddingId(processedText, 'text');
391
- return {
392
- embedding_id: embeddingId,
393
- vector: embedding,
394
- contentType: 'text',
395
- metadata: {
396
- originalText: text,
397
- processedText: processedText,
398
- textLength: processedText.length,
399
- embeddingMagnitudeBeforeNorm: magnitudeBeforeNorm,
400
- embeddingMagnitudeAfterNorm: magnitudeAfterNorm,
401
- normalized: true,
402
- modelName: this.modelName,
403
- modelType: this.modelType,
404
- dimensions: this.dimensions
405
- }
406
- };
407
- }
408
- catch (error) {
409
- throw error;
410
- }
407
+ };
411
408
  }
412
409
  // =============================================================================
413
410
  // IMAGE EMBEDDING METHODS
@@ -602,9 +599,11 @@ export class CLIPEmbedder extends BaseUniversalEmbedder {
602
599
  const absolutePath = path.resolve(imagePath);
603
600
  // Try to use Sharp for better Node.js support
604
601
  try {
605
- const sharp = await import('sharp');
602
+ const sharpModule = await import('sharp');
603
+ const sharp = sharpModule.default;
604
+ sharp.concurrency(2);
606
605
  // Use Sharp to load and get raw pixel data
607
- const { data, info } = await sharp.default(absolutePath)
606
+ const { data, info } = await sharp(absolutePath)
608
607
  .resize(variant.imageSize, variant.imageSize, {
609
608
  fit: 'cover',
610
609
  position: 'center'
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "rag-lite-ts",
3
- "version": "2.0.5",
3
+ "version": "2.1.0",
4
4
  "description": "Local-first TypeScript retrieval engine with Chameleon Multimodal Architecture for semantic search over text and image content",
5
5
  "type": "module",
6
6
  "main": "./dist/index.js",