rag-lite-ts 2.0.5 → 2.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +815 -808
- package/dist/cli/indexer.js +2 -38
- package/dist/cli/search.d.ts +1 -1
- package/dist/cli/search.js +118 -9
- package/dist/cli.js +77 -94
- package/dist/core/db.js +173 -173
- package/dist/core/ingestion.js +47 -9
- package/dist/core/lazy-dependency-loader.d.ts +3 -8
- package/dist/core/lazy-dependency-loader.js +11 -29
- package/dist/core/mode-detection-service.js +1 -1
- package/dist/core/reranking-config.d.ts +1 -1
- package/dist/core/reranking-config.js +7 -16
- package/dist/core/reranking-factory.js +3 -184
- package/dist/core/reranking-strategies.js +5 -4
- package/dist/core/search.d.ts +10 -0
- package/dist/core/search.js +34 -11
- package/dist/factories/ingestion-factory.js +3 -1
- package/dist/mcp-server.js +127 -105
- package/dist/multimodal/clip-embedder.js +70 -71
- package/package.json +1 -1
package/dist/mcp-server.js
CHANGED
|
@@ -31,6 +31,22 @@ import { IngestionFactory } from './factories/ingestion-factory.js';
|
|
|
31
31
|
import { getSystemInfo } from './core/db.js';
|
|
32
32
|
import { DatabaseConnectionManager } from './core/database-connection-manager.js';
|
|
33
33
|
import { config, validateCoreConfig, ConfigurationError } from './core/config.js';
|
|
34
|
+
/**
|
|
35
|
+
* Detect MIME type from file path or extension
|
|
36
|
+
*/
|
|
37
|
+
function getMimeTypeFromPath(filePath) {
|
|
38
|
+
const ext = filePath.toLowerCase().split('.').pop() || '';
|
|
39
|
+
const mimeTypes = {
|
|
40
|
+
'jpg': 'image/jpeg',
|
|
41
|
+
'jpeg': 'image/jpeg',
|
|
42
|
+
'png': 'image/png',
|
|
43
|
+
'gif': 'image/gif',
|
|
44
|
+
'webp': 'image/webp',
|
|
45
|
+
'bmp': 'image/bmp',
|
|
46
|
+
'svg': 'image/svg+xml'
|
|
47
|
+
};
|
|
48
|
+
return mimeTypes[ext] || 'image/jpeg'; // Default to JPEG if unknown
|
|
49
|
+
}
|
|
34
50
|
/**
|
|
35
51
|
* MCP Server class that wraps RAG-lite TS functionality
|
|
36
52
|
* Implements MCP protocol interface without creating REST/GraphQL endpoints
|
|
@@ -118,8 +134,8 @@ class RagLiteMCPServer {
|
|
|
118
134
|
},
|
|
119
135
|
rerank_strategy: {
|
|
120
136
|
type: 'string',
|
|
121
|
-
description: 'Reranking strategy for multimodal mode. Options: text-derived (default),
|
|
122
|
-
enum: ['text-derived', '
|
|
137
|
+
description: 'Reranking strategy for multimodal mode. Options: text-derived (default), disabled',
|
|
138
|
+
enum: ['text-derived', 'disabled']
|
|
123
139
|
},
|
|
124
140
|
force_rebuild: {
|
|
125
141
|
type: 'boolean',
|
|
@@ -152,8 +168,8 @@ class RagLiteMCPServer {
|
|
|
152
168
|
},
|
|
153
169
|
rerank_strategy: {
|
|
154
170
|
type: 'string',
|
|
155
|
-
description: 'Reranking strategy for multimodal mode. Options: text-derived (default),
|
|
156
|
-
enum: ['text-derived', '
|
|
171
|
+
description: 'Reranking strategy for multimodal mode. Options: text-derived (default), disabled',
|
|
172
|
+
enum: ['text-derived', 'disabled'],
|
|
157
173
|
default: 'text-derived'
|
|
158
174
|
},
|
|
159
175
|
title: {
|
|
@@ -375,50 +391,60 @@ class RagLiteMCPServer {
|
|
|
375
391
|
const startTime = Date.now();
|
|
376
392
|
const results = await this.searchEngine.search(args.query, searchOptions);
|
|
377
393
|
const searchTime = Date.now() - startTime;
|
|
378
|
-
// Format results for MCP response with content
|
|
379
|
-
const
|
|
394
|
+
// Format results for MCP response with proper image content support
|
|
395
|
+
const textResults = {
|
|
380
396
|
query: args.query,
|
|
381
397
|
results_count: results.length,
|
|
382
398
|
search_time_ms: searchTime,
|
|
383
|
-
results:
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
if (result.contentType === 'image' && result.document.contentId) {
|
|
398
|
-
try {
|
|
399
|
-
const imageData = await this.searchEngine.getContent(result.document.contentId, 'base64');
|
|
400
|
-
formattedResult.image_data = imageData;
|
|
401
|
-
formattedResult.image_format = 'base64';
|
|
402
|
-
}
|
|
403
|
-
catch (error) {
|
|
404
|
-
// If image retrieval fails, include error but don't fail the entire search
|
|
405
|
-
formattedResult.image_error = error instanceof Error ? error.message : 'Failed to retrieve image';
|
|
406
|
-
}
|
|
407
|
-
}
|
|
408
|
-
// Include metadata if available
|
|
409
|
-
if (result.metadata) {
|
|
410
|
-
formattedResult.metadata = result.metadata;
|
|
411
|
-
}
|
|
412
|
-
return formattedResult;
|
|
399
|
+
results: results.map((result, index) => ({
|
|
400
|
+
rank: index + 1,
|
|
401
|
+
score: Math.round(result.score * 100) / 100,
|
|
402
|
+
content_type: result.contentType,
|
|
403
|
+
document: {
|
|
404
|
+
id: result.document.id,
|
|
405
|
+
title: result.document.title,
|
|
406
|
+
source: result.document.source,
|
|
407
|
+
content_type: result.document.contentType
|
|
408
|
+
},
|
|
409
|
+
text: result.content,
|
|
410
|
+
metadata: result.metadata,
|
|
411
|
+
// Reference to image content if applicable
|
|
412
|
+
has_image: result.contentType === 'image' && !!result.document.contentId
|
|
413
413
|
}))
|
|
414
414
|
};
|
|
415
|
+
// Build MCP response content array
|
|
416
|
+
const responseContent = [
|
|
417
|
+
{
|
|
418
|
+
type: 'text',
|
|
419
|
+
text: JSON.stringify(textResults, null, 2)
|
|
420
|
+
}
|
|
421
|
+
];
|
|
422
|
+
// Add proper MCP image content for each image result
|
|
423
|
+
for (const result of results) {
|
|
424
|
+
if (result.contentType === 'image' && result.document.contentId) {
|
|
425
|
+
try {
|
|
426
|
+
const imageData = await this.searchEngine.getContent(result.document.contentId, 'base64');
|
|
427
|
+
const mimeType = getMimeTypeFromPath(result.document.source);
|
|
428
|
+
responseContent.push({
|
|
429
|
+
type: 'image',
|
|
430
|
+
data: imageData,
|
|
431
|
+
mimeType: mimeType,
|
|
432
|
+
annotations: {
|
|
433
|
+
audience: ['user'],
|
|
434
|
+
priority: 0.8,
|
|
435
|
+
title: result.document.title,
|
|
436
|
+
source: result.document.source
|
|
437
|
+
}
|
|
438
|
+
});
|
|
439
|
+
}
|
|
440
|
+
catch (error) {
|
|
441
|
+
// If image retrieval fails, log but don't fail the entire search
|
|
442
|
+
console.error(`Failed to retrieve image for ${result.document.source}:`, error);
|
|
443
|
+
}
|
|
444
|
+
}
|
|
445
|
+
}
|
|
415
446
|
return {
|
|
416
|
-
content:
|
|
417
|
-
{
|
|
418
|
-
type: 'text',
|
|
419
|
-
text: JSON.stringify(formattedResults, null, 2),
|
|
420
|
-
},
|
|
421
|
-
],
|
|
447
|
+
content: responseContent
|
|
422
448
|
};
|
|
423
449
|
}
|
|
424
450
|
catch (error) {
|
|
@@ -549,7 +575,7 @@ class RagLiteMCPServer {
|
|
|
549
575
|
if (mode === 'text') {
|
|
550
576
|
throw new Error('Reranking strategy parameter is only supported in multimodal mode');
|
|
551
577
|
}
|
|
552
|
-
const validStrategies = ['text-derived', '
|
|
578
|
+
const validStrategies = ['text-derived', 'disabled'];
|
|
553
579
|
if (!validStrategies.includes(args.rerank_strategy)) {
|
|
554
580
|
throw new Error(`Invalid reranking strategy: ${args.rerank_strategy}. Supported strategies: ${validStrategies.join(', ')}`);
|
|
555
581
|
}
|
|
@@ -1223,48 +1249,61 @@ class RagLiteMCPServer {
|
|
|
1223
1249
|
const startTime = Date.now();
|
|
1224
1250
|
const results = await this.searchEngine.search(args.query, searchOptions);
|
|
1225
1251
|
const searchTime = Date.now() - startTime;
|
|
1226
|
-
// Format results for MCP response with
|
|
1227
|
-
const
|
|
1252
|
+
// Format results for MCP response with proper image content support
|
|
1253
|
+
const textResults = {
|
|
1228
1254
|
query: args.query,
|
|
1229
1255
|
content_type_filter: args.content_type || 'all',
|
|
1230
1256
|
results_count: results.length,
|
|
1231
1257
|
search_time_ms: searchTime,
|
|
1232
|
-
results:
|
|
1233
|
-
|
|
1234
|
-
|
|
1235
|
-
|
|
1236
|
-
|
|
1237
|
-
|
|
1238
|
-
|
|
1239
|
-
|
|
1240
|
-
|
|
1241
|
-
|
|
1242
|
-
|
|
1243
|
-
|
|
1244
|
-
|
|
1245
|
-
|
|
1246
|
-
// For image content, include base64-encoded image data for MCP clients
|
|
1247
|
-
if (result.contentType === 'image' && result.document.contentId) {
|
|
1248
|
-
try {
|
|
1249
|
-
const imageData = await this.searchEngine.getContent(result.document.contentId, 'base64');
|
|
1250
|
-
formattedResult.image_data = imageData;
|
|
1251
|
-
formattedResult.image_format = 'base64';
|
|
1252
|
-
}
|
|
1253
|
-
catch (error) {
|
|
1254
|
-
// If image retrieval fails, include error but don't fail the entire search
|
|
1255
|
-
formattedResult.image_error = error instanceof Error ? error.message : 'Failed to retrieve image';
|
|
1256
|
-
}
|
|
1257
|
-
}
|
|
1258
|
-
return formattedResult;
|
|
1258
|
+
results: results.map((result, index) => ({
|
|
1259
|
+
rank: index + 1,
|
|
1260
|
+
score: Math.round(result.score * 100) / 100,
|
|
1261
|
+
content_type: result.contentType,
|
|
1262
|
+
document: {
|
|
1263
|
+
id: result.document.id,
|
|
1264
|
+
title: result.document.title,
|
|
1265
|
+
source: result.document.source,
|
|
1266
|
+
content_type: result.document.contentType
|
|
1267
|
+
},
|
|
1268
|
+
text: result.content,
|
|
1269
|
+
metadata: result.metadata,
|
|
1270
|
+
// Reference to image content if applicable
|
|
1271
|
+
has_image: result.contentType === 'image' && !!result.document.contentId
|
|
1259
1272
|
}))
|
|
1260
1273
|
};
|
|
1274
|
+
// Build MCP response content array
|
|
1275
|
+
const responseContent = [
|
|
1276
|
+
{
|
|
1277
|
+
type: 'text',
|
|
1278
|
+
text: JSON.stringify(textResults, null, 2)
|
|
1279
|
+
}
|
|
1280
|
+
];
|
|
1281
|
+
// Add proper MCP image content for each image result
|
|
1282
|
+
for (const result of results) {
|
|
1283
|
+
if (result.contentType === 'image' && result.document.contentId) {
|
|
1284
|
+
try {
|
|
1285
|
+
const imageData = await this.searchEngine.getContent(result.document.contentId, 'base64');
|
|
1286
|
+
const mimeType = getMimeTypeFromPath(result.document.source);
|
|
1287
|
+
responseContent.push({
|
|
1288
|
+
type: 'image',
|
|
1289
|
+
data: imageData,
|
|
1290
|
+
mimeType: mimeType,
|
|
1291
|
+
annotations: {
|
|
1292
|
+
audience: ['user'],
|
|
1293
|
+
priority: 0.8,
|
|
1294
|
+
title: result.document.title,
|
|
1295
|
+
source: result.document.source
|
|
1296
|
+
}
|
|
1297
|
+
});
|
|
1298
|
+
}
|
|
1299
|
+
catch (error) {
|
|
1300
|
+
// If image retrieval fails, log but don't fail the entire search
|
|
1301
|
+
console.error(`Failed to retrieve image for ${result.document.source}:`, error);
|
|
1302
|
+
}
|
|
1303
|
+
}
|
|
1304
|
+
}
|
|
1261
1305
|
return {
|
|
1262
|
-
content:
|
|
1263
|
-
{
|
|
1264
|
-
type: 'text',
|
|
1265
|
-
text: JSON.stringify(formattedResults, null, 2),
|
|
1266
|
-
},
|
|
1267
|
-
],
|
|
1306
|
+
content: responseContent
|
|
1268
1307
|
};
|
|
1269
1308
|
}
|
|
1270
1309
|
catch (error) {
|
|
@@ -1418,23 +1457,6 @@ class RagLiteMCPServer {
|
|
|
1418
1457
|
strategyInfo.accuracy = 'high';
|
|
1419
1458
|
strategyInfo.use_cases = ['Mixed content with images', 'Visual documentation', 'Diagrams and charts'];
|
|
1420
1459
|
break;
|
|
1421
|
-
case 'metadata':
|
|
1422
|
-
strategyInfo.description = 'Uses file metadata, filenames, and content properties for scoring without model inference';
|
|
1423
|
-
strategyInfo.requirements = ['None - uses file system metadata only'];
|
|
1424
|
-
strategyInfo.supported_content_types = ['text', 'image', 'pdf', 'docx'];
|
|
1425
|
-
strategyInfo.performance_impact = 'low';
|
|
1426
|
-
strategyInfo.accuracy = 'medium';
|
|
1427
|
-
strategyInfo.use_cases = ['Fast retrieval', 'Filename-based search', 'Content type filtering'];
|
|
1428
|
-
break;
|
|
1429
|
-
case 'hybrid':
|
|
1430
|
-
strategyInfo.description = 'Combines multiple reranking signals (semantic + metadata) with configurable weights';
|
|
1431
|
-
strategyInfo.requirements = ['Text-derived reranker', 'Metadata reranker'];
|
|
1432
|
-
strategyInfo.supported_content_types = ['text', 'image', 'pdf', 'docx'];
|
|
1433
|
-
strategyInfo.performance_impact = 'high';
|
|
1434
|
-
strategyInfo.accuracy = 'very high';
|
|
1435
|
-
strategyInfo.use_cases = ['Best overall accuracy', 'Complex multimodal collections', 'Production systems'];
|
|
1436
|
-
strategyInfo.default_weights = { semantic: 0.7, metadata: 0.3 };
|
|
1437
|
-
break;
|
|
1438
1460
|
case 'disabled':
|
|
1439
1461
|
strategyInfo.description = 'No reranking applied - results ordered by vector similarity scores only';
|
|
1440
1462
|
strategyInfo.requirements = ['None'];
|
|
@@ -1455,8 +1477,8 @@ class RagLiteMCPServer {
|
|
|
1455
1477
|
strategies_by_mode: strategiesByMode,
|
|
1456
1478
|
recommendations: {
|
|
1457
1479
|
text_mode: 'Use cross-encoder for best accuracy, disabled for best performance',
|
|
1458
|
-
multimodal_mode: 'Use
|
|
1459
|
-
development: 'Start with disabled
|
|
1480
|
+
multimodal_mode: 'Use text-derived for best accuracy, disabled for best performance',
|
|
1481
|
+
development: 'Start with disabled for fast iteration, upgrade to cross-encoder/text-derived for production'
|
|
1460
1482
|
}
|
|
1461
1483
|
};
|
|
1462
1484
|
return {
|
|
@@ -1505,16 +1527,16 @@ class RagLiteMCPServer {
|
|
|
1505
1527
|
const db = await DatabaseConnectionManager.getConnection(config.db_file);
|
|
1506
1528
|
try {
|
|
1507
1529
|
// Get document count by content type
|
|
1508
|
-
const docsByType = await db.all(`
|
|
1509
|
-
SELECT content_type, COUNT(*) as count
|
|
1510
|
-
FROM documents
|
|
1511
|
-
GROUP BY content_type
|
|
1530
|
+
const docsByType = await db.all(`
|
|
1531
|
+
SELECT content_type, COUNT(*) as count
|
|
1532
|
+
FROM documents
|
|
1533
|
+
GROUP BY content_type
|
|
1512
1534
|
`);
|
|
1513
1535
|
// Get chunk count by content type
|
|
1514
|
-
const chunksByType = await db.all(`
|
|
1515
|
-
SELECT content_type, COUNT(*) as count
|
|
1516
|
-
FROM chunks
|
|
1517
|
-
GROUP BY content_type
|
|
1536
|
+
const chunksByType = await db.all(`
|
|
1537
|
+
SELECT content_type, COUNT(*) as count
|
|
1538
|
+
FROM chunks
|
|
1539
|
+
GROUP BY content_type
|
|
1518
1540
|
`);
|
|
1519
1541
|
enhancedStats.content_breakdown = {
|
|
1520
1542
|
documents_by_type: docsByType.reduce((acc, row) => {
|
|
@@ -338,76 +338,73 @@ export class CLIPEmbedder extends BaseUniversalEmbedder {
|
|
|
338
338
|
if (!this.textModel || !this.tokenizer) {
|
|
339
339
|
throw new Error('CLIP text model or tokenizer not initialized');
|
|
340
340
|
}
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
|
|
341
|
+
// Use the validated CLIPTextModelWithProjection approach (no pixel_values errors)
|
|
342
|
+
// Tokenize text with CLIP's requirements
|
|
343
|
+
// The tokenizer handles truncation at 77 TOKENS (not characters)
|
|
344
|
+
const tokens = await this.tokenizer(processedText, {
|
|
345
|
+
padding: true,
|
|
346
|
+
truncation: true,
|
|
347
|
+
max_length: 77, // CLIP's text sequence length limit (77 tokens)
|
|
348
|
+
return_tensors: 'pt'
|
|
349
|
+
});
|
|
350
|
+
// Log token information for debugging (only in development)
|
|
351
|
+
if (process.env.NODE_ENV === 'development') {
|
|
352
|
+
const tokenIds = tokens.input_ids?.data || [];
|
|
353
|
+
const actualTokenCount = Array.from(tokenIds).filter((id) => id !== 0).length;
|
|
354
|
+
if (actualTokenCount >= 77) {
|
|
355
|
+
console.warn(`Text truncated by tokenizer: "${processedText.substring(0, 50)}..." (truncated to 77 tokens)`);
|
|
356
|
+
}
|
|
357
|
+
}
|
|
358
|
+
// Generate text embedding using CLIPTextModelWithProjection
|
|
359
|
+
const output = await this.textModel(tokens);
|
|
360
|
+
// Extract embedding from text_embeds (no pixel_values dependency)
|
|
361
|
+
const embedding = new Float32Array(output.text_embeds.data);
|
|
362
|
+
// Validate embedding dimensions and values
|
|
363
|
+
if (embedding.length !== this.dimensions) {
|
|
364
|
+
throw new Error(`CLIP embedding dimension mismatch: expected ${this.dimensions}, got ${embedding.length}`);
|
|
365
|
+
}
|
|
366
|
+
// Validate that all values are finite numbers
|
|
367
|
+
const invalidValues = Array.from(embedding).filter(val => !isFinite(val) || isNaN(val));
|
|
368
|
+
if (invalidValues.length > 0) {
|
|
369
|
+
throw new Error(`CLIP embedding contains ${invalidValues.length} invalid values`);
|
|
370
|
+
}
|
|
371
|
+
// Validate embedding quality - should not be all zeros
|
|
372
|
+
const nonZeroValues = Array.from(embedding).filter(val => Math.abs(val) > 1e-8);
|
|
373
|
+
if (nonZeroValues.length === 0) {
|
|
374
|
+
throw new Error('CLIP embedding is all zeros');
|
|
375
|
+
}
|
|
376
|
+
// Calculate embedding magnitude before normalization for quality assessment
|
|
377
|
+
const magnitudeBeforeNorm = Math.sqrt(Array.from(embedding).reduce((sum, val) => sum + val * val, 0));
|
|
378
|
+
if (magnitudeBeforeNorm < 1e-6) {
|
|
379
|
+
throw new Error(`CLIP embedding has critically low magnitude: ${magnitudeBeforeNorm.toExponential(3)}`);
|
|
380
|
+
}
|
|
381
|
+
// Apply L2-normalization (CLIP models are trained with normalized embeddings)
|
|
382
|
+
this.normalizeEmbedding(embedding);
|
|
383
|
+
// Verify normalization was successful
|
|
384
|
+
const magnitudeAfterNorm = Math.sqrt(Array.from(embedding).reduce((sum, val) => sum + val * val, 0));
|
|
385
|
+
if (Math.abs(magnitudeAfterNorm - 1.0) > 0.01) {
|
|
386
|
+
console.warn(`Warning: Embedding normalization may be imprecise (magnitude: ${magnitudeAfterNorm.toFixed(6)})`);
|
|
387
|
+
}
|
|
388
|
+
// Log text embedding generation
|
|
389
|
+
console.log(`[CLIP] Generated text embedding for: "${processedText.substring(0, 30)}${processedText.length > 30 ? '...' : ''}"`);
|
|
390
|
+
// Generate unique embedding ID
|
|
391
|
+
const embeddingId = this.generateEmbeddingId(processedText, 'text');
|
|
392
|
+
return {
|
|
393
|
+
embedding_id: embeddingId,
|
|
394
|
+
vector: embedding,
|
|
395
|
+
contentType: 'text',
|
|
396
|
+
metadata: {
|
|
397
|
+
originalText: text,
|
|
398
|
+
processedText: processedText,
|
|
399
|
+
textLength: processedText.length,
|
|
400
|
+
embeddingMagnitudeBeforeNorm: magnitudeBeforeNorm,
|
|
401
|
+
embeddingMagnitudeAfterNorm: magnitudeAfterNorm,
|
|
402
|
+
normalized: true,
|
|
403
|
+
modelName: this.modelName,
|
|
404
|
+
modelType: this.modelType,
|
|
405
|
+
dimensions: this.dimensions
|
|
388
406
|
}
|
|
389
|
-
|
|
390
|
-
const embeddingId = this.generateEmbeddingId(processedText, 'text');
|
|
391
|
-
return {
|
|
392
|
-
embedding_id: embeddingId,
|
|
393
|
-
vector: embedding,
|
|
394
|
-
contentType: 'text',
|
|
395
|
-
metadata: {
|
|
396
|
-
originalText: text,
|
|
397
|
-
processedText: processedText,
|
|
398
|
-
textLength: processedText.length,
|
|
399
|
-
embeddingMagnitudeBeforeNorm: magnitudeBeforeNorm,
|
|
400
|
-
embeddingMagnitudeAfterNorm: magnitudeAfterNorm,
|
|
401
|
-
normalized: true,
|
|
402
|
-
modelName: this.modelName,
|
|
403
|
-
modelType: this.modelType,
|
|
404
|
-
dimensions: this.dimensions
|
|
405
|
-
}
|
|
406
|
-
};
|
|
407
|
-
}
|
|
408
|
-
catch (error) {
|
|
409
|
-
throw error;
|
|
410
|
-
}
|
|
407
|
+
};
|
|
411
408
|
}
|
|
412
409
|
// =============================================================================
|
|
413
410
|
// IMAGE EMBEDDING METHODS
|
|
@@ -602,9 +599,11 @@ export class CLIPEmbedder extends BaseUniversalEmbedder {
|
|
|
602
599
|
const absolutePath = path.resolve(imagePath);
|
|
603
600
|
// Try to use Sharp for better Node.js support
|
|
604
601
|
try {
|
|
605
|
-
const
|
|
602
|
+
const sharpModule = await import('sharp');
|
|
603
|
+
const sharp = sharpModule.default;
|
|
604
|
+
sharp.concurrency(2);
|
|
606
605
|
// Use Sharp to load and get raw pixel data
|
|
607
|
-
const { data, info } = await sharp
|
|
606
|
+
const { data, info } = await sharp(absolutePath)
|
|
608
607
|
.resize(variant.imageSize, variant.imageSize, {
|
|
609
608
|
fit: 'cover',
|
|
610
609
|
position: 'center'
|
package/package.json
CHANGED