rag-lite-ts 2.0.4 → 2.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +815 -808
- package/dist/cli/indexer.js +2 -38
- package/dist/cli/search.d.ts +1 -1
- package/dist/cli/search.js +118 -9
- package/dist/cli.js +77 -94
- package/dist/config.js +3 -0
- package/dist/core/database-connection-manager.js +5 -9
- package/dist/core/db.js +173 -173
- package/dist/core/ingestion.js +50 -9
- package/dist/core/lazy-dependency-loader.d.ts +3 -8
- package/dist/core/lazy-dependency-loader.js +11 -29
- package/dist/core/mode-detection-service.js +1 -1
- package/dist/core/reranking-config.d.ts +1 -1
- package/dist/core/reranking-config.js +7 -16
- package/dist/core/reranking-factory.js +3 -184
- package/dist/core/reranking-strategies.js +5 -4
- package/dist/core/search.d.ts +10 -0
- package/dist/core/search.js +34 -11
- package/dist/factories/ingestion-factory.js +3 -1
- package/dist/mcp-server.js +147 -120
- package/dist/multimodal/clip-embedder.js +70 -71
- package/package.json +105 -105
|
@@ -17,15 +17,13 @@ export const DEFAULT_MULTIMODAL_RERANKING_CONFIG = {
|
|
|
17
17
|
semantic: 0.7,
|
|
18
18
|
metadata: 0.3
|
|
19
19
|
},
|
|
20
|
-
fallback: '
|
|
20
|
+
fallback: 'disabled'
|
|
21
21
|
};
|
|
22
22
|
// Strategy validation without complex interface patterns
|
|
23
23
|
export function validateRerankingStrategy(strategy) {
|
|
24
24
|
const validStrategies = [
|
|
25
25
|
'cross-encoder',
|
|
26
26
|
'text-derived',
|
|
27
|
-
'metadata',
|
|
28
|
-
'hybrid',
|
|
29
27
|
'disabled'
|
|
30
28
|
];
|
|
31
29
|
return validStrategies.includes(strategy);
|
|
@@ -36,7 +34,7 @@ export function validateRerankingConfig(config) {
|
|
|
36
34
|
throw new Error('Reranking strategy is required');
|
|
37
35
|
}
|
|
38
36
|
if (!validateRerankingStrategy(config.strategy)) {
|
|
39
|
-
const validStrategies = ['cross-encoder', 'text-derived', '
|
|
37
|
+
const validStrategies = ['cross-encoder', 'text-derived', 'disabled'];
|
|
40
38
|
throw new Error(`Invalid reranking strategy '${config.strategy}'. ` +
|
|
41
39
|
`Valid strategies: ${validStrategies.join(', ')}`);
|
|
42
40
|
}
|
|
@@ -52,23 +50,16 @@ export function validateRerankingConfig(config) {
|
|
|
52
50
|
if (visual !== undefined && (visual < 0 || visual > 1)) {
|
|
53
51
|
throw new Error('Visual weight must be between 0 and 1');
|
|
54
52
|
}
|
|
55
|
-
// Ensure weights sum to reasonable value for hybrid strategy
|
|
56
|
-
if (config.strategy === 'hybrid') {
|
|
57
|
-
const totalWeight = (semantic || 0) + (metadata || 0) + (visual || 0);
|
|
58
|
-
if (totalWeight === 0) {
|
|
59
|
-
throw new Error('Hybrid strategy requires at least one weight to be greater than 0');
|
|
60
|
-
}
|
|
61
|
-
}
|
|
62
53
|
}
|
|
63
54
|
// Validate fallback strategy if provided
|
|
64
55
|
if (config.fallback && !validateRerankingStrategy(config.fallback)) {
|
|
65
|
-
const validStrategies = ['cross-encoder', 'text-derived', '
|
|
56
|
+
const validStrategies = ['cross-encoder', 'text-derived', 'disabled'];
|
|
66
57
|
throw new Error(`Invalid fallback strategy '${config.fallback}'. ` +
|
|
67
58
|
`Valid strategies: ${validStrategies.join(', ')}`);
|
|
68
59
|
}
|
|
69
60
|
return {
|
|
70
61
|
strategy: config.strategy,
|
|
71
|
-
enabled: config.enabled ?? true,
|
|
62
|
+
enabled: config.strategy === 'disabled' ? false : (config.enabled ?? true),
|
|
72
63
|
model: config.model,
|
|
73
64
|
weights: config.weights,
|
|
74
65
|
fallback: config.fallback || 'disabled'
|
|
@@ -91,7 +82,7 @@ export function isStrategySupported(strategy, mode) {
|
|
|
91
82
|
case 'text':
|
|
92
83
|
return strategy === 'cross-encoder' || strategy === 'disabled';
|
|
93
84
|
case 'multimodal':
|
|
94
|
-
return ['text-derived', '
|
|
85
|
+
return ['text-derived', 'disabled'].includes(strategy);
|
|
95
86
|
default:
|
|
96
87
|
return false;
|
|
97
88
|
}
|
|
@@ -102,7 +93,7 @@ export function getSupportedStrategies(mode) {
|
|
|
102
93
|
case 'text':
|
|
103
94
|
return ['cross-encoder', 'disabled'];
|
|
104
95
|
case 'multimodal':
|
|
105
|
-
return ['text-derived', '
|
|
96
|
+
return ['text-derived', 'disabled'];
|
|
106
97
|
default:
|
|
107
98
|
return ['disabled'];
|
|
108
99
|
}
|
|
@@ -145,7 +136,7 @@ export class RerankingConfigBuilder {
|
|
|
145
136
|
.strategy('text-derived')
|
|
146
137
|
.enabled(true)
|
|
147
138
|
.weights({ semantic: 0.7, metadata: 0.3 })
|
|
148
|
-
.fallback('
|
|
139
|
+
.fallback('disabled');
|
|
149
140
|
}
|
|
150
141
|
static disabled() {
|
|
151
142
|
return new RerankingConfigBuilder()
|
|
@@ -6,7 +6,7 @@
|
|
|
6
6
|
* principle of using simple functions over complex factory patterns.
|
|
7
7
|
*/
|
|
8
8
|
import { getDefaultRerankingConfig, isStrategySupported, getSupportedStrategies, validateRerankingConfig } from './reranking-config.js';
|
|
9
|
-
import { createCrossEncoderRerankFunction, createTextDerivedRerankFunction
|
|
9
|
+
import { createCrossEncoderRerankFunction, createTextDerivedRerankFunction } from './reranking-strategies.js';
|
|
10
10
|
/**
|
|
11
11
|
* Simple reranking creation function with conditional logic
|
|
12
12
|
*
|
|
@@ -102,23 +102,6 @@ function createRerankingFunction(mode, strategy, config) {
|
|
|
102
102
|
undefined // Use default cross-encoder model
|
|
103
103
|
);
|
|
104
104
|
break;
|
|
105
|
-
case 'metadata':
|
|
106
|
-
console.log(`Creating metadata reranker for ${mode} mode`);
|
|
107
|
-
reranker = createMetadataRerankFunction({
|
|
108
|
-
weights: config.weights ? {
|
|
109
|
-
filename: config.weights.metadata || 0.4,
|
|
110
|
-
contentType: 0.3,
|
|
111
|
-
metadata: config.weights.metadata || 0.3
|
|
112
|
-
} : undefined
|
|
113
|
-
});
|
|
114
|
-
break;
|
|
115
|
-
case 'hybrid':
|
|
116
|
-
if (mode !== 'multimodal') {
|
|
117
|
-
throw new RerankingStrategyError(strategy, mode, 'Hybrid strategy only supported in multimodal mode', 'UNSUPPORTED_MODE');
|
|
118
|
-
}
|
|
119
|
-
console.log('Creating hybrid reranker for multimodal mode');
|
|
120
|
-
reranker = createHybridRerankFunction(config);
|
|
121
|
-
break;
|
|
122
105
|
case 'disabled':
|
|
123
106
|
console.log('Reranking explicitly disabled');
|
|
124
107
|
return undefined;
|
|
@@ -241,172 +224,10 @@ function wrapRerankFunctionWithErrorRecovery(reranker, strategy, mode) {
|
|
|
241
224
|
};
|
|
242
225
|
}
|
|
243
226
|
/**
|
|
244
|
-
*
|
|
227
|
+
* Hybrid reranking strategy removed in Phase 3 - throwing error for backward compatibility
|
|
245
228
|
*/
|
|
246
229
|
function createHybridRerankFunction(config) {
|
|
247
|
-
|
|
248
|
-
const weights = config.weights || {
|
|
249
|
-
semantic: 0.6,
|
|
250
|
-
metadata: 0.4,
|
|
251
|
-
visual: 0.0 // Not implemented yet
|
|
252
|
-
};
|
|
253
|
-
// Track which strategies are available
|
|
254
|
-
const availableStrategies = {};
|
|
255
|
-
// Initialize strategies with error handling
|
|
256
|
-
try {
|
|
257
|
-
if (weights.semantic && weights.semantic > 0) {
|
|
258
|
-
availableStrategies.textDerived = createTextDerivedRerankFunction();
|
|
259
|
-
console.log('✅ Text-derived strategy initialized for hybrid reranking');
|
|
260
|
-
}
|
|
261
|
-
}
|
|
262
|
-
catch (error) {
|
|
263
|
-
console.warn(`⚠️ Text-derived strategy initialization failed for hybrid reranking: ${error instanceof Error ? error.message : 'Unknown error'}`);
|
|
264
|
-
}
|
|
265
|
-
try {
|
|
266
|
-
if (weights.metadata && weights.metadata > 0) {
|
|
267
|
-
availableStrategies.metadata = createMetadataRerankFunction();
|
|
268
|
-
console.log('✅ Metadata strategy initialized for hybrid reranking');
|
|
269
|
-
}
|
|
270
|
-
}
|
|
271
|
-
catch (error) {
|
|
272
|
-
console.warn(`⚠️ Metadata strategy initialization failed for hybrid reranking: ${error instanceof Error ? error.message : 'Unknown error'}`);
|
|
273
|
-
}
|
|
274
|
-
// Check if any strategies are available
|
|
275
|
-
const hasAvailableStrategies = Object.keys(availableStrategies).length > 0;
|
|
276
|
-
if (!hasAvailableStrategies) {
|
|
277
|
-
throw new RerankingStrategyError('hybrid', 'multimodal', 'No hybrid reranking strategies could be initialized', 'NO_STRATEGIES_AVAILABLE');
|
|
278
|
-
}
|
|
279
|
-
console.log(`Hybrid reranking initialized with ${Object.keys(availableStrategies).length} available strategies`);
|
|
280
|
-
return async (query, results, contentType) => {
|
|
281
|
-
const startTime = Date.now();
|
|
282
|
-
const strategyResults = {};
|
|
283
|
-
try {
|
|
284
|
-
console.log(`🔄 Running hybrid reranking with ${Object.keys(availableStrategies).length} strategies`);
|
|
285
|
-
// Start with original results
|
|
286
|
-
let hybridResults = [...results];
|
|
287
|
-
let successfulStrategies = 0;
|
|
288
|
-
// Apply text-derived reranking if available and enabled
|
|
289
|
-
if (availableStrategies.textDerived && weights.semantic && weights.semantic > 0) {
|
|
290
|
-
const strategyStartTime = Date.now();
|
|
291
|
-
try {
|
|
292
|
-
console.log(`🔧 Applying text-derived reranking (weight: ${weights.semantic})`);
|
|
293
|
-
const textDerivedResults = await availableStrategies.textDerived(query, hybridResults, contentType);
|
|
294
|
-
// Combine scores with semantic weight
|
|
295
|
-
hybridResults = hybridResults.map((result, index) => {
|
|
296
|
-
const textDerivedScore = textDerivedResults[index]?.score || result.score;
|
|
297
|
-
const combinedScore = result.score * (1 - weights.semantic) + textDerivedScore * weights.semantic;
|
|
298
|
-
return {
|
|
299
|
-
...result,
|
|
300
|
-
score: combinedScore,
|
|
301
|
-
metadata: {
|
|
302
|
-
...result.metadata,
|
|
303
|
-
hybridScores: {
|
|
304
|
-
...(result.metadata?.hybridScores || {}),
|
|
305
|
-
textDerived: textDerivedScore,
|
|
306
|
-
semantic: combinedScore
|
|
307
|
-
}
|
|
308
|
-
}
|
|
309
|
-
};
|
|
310
|
-
});
|
|
311
|
-
const strategyDuration = Date.now() - strategyStartTime;
|
|
312
|
-
strategyResults.textDerived = { success: true, duration: strategyDuration };
|
|
313
|
-
successfulStrategies++;
|
|
314
|
-
console.log(`✅ Text-derived reranking completed (${strategyDuration}ms)`);
|
|
315
|
-
}
|
|
316
|
-
catch (error) {
|
|
317
|
-
const strategyDuration = Date.now() - strategyStartTime;
|
|
318
|
-
const errorMessage = error instanceof Error ? error.message : 'Unknown error';
|
|
319
|
-
strategyResults.textDerived = { success: false, error: errorMessage, duration: strategyDuration };
|
|
320
|
-
console.warn(`❌ Text-derived reranking failed in hybrid mode (${strategyDuration}ms): ${errorMessage}`);
|
|
321
|
-
}
|
|
322
|
-
}
|
|
323
|
-
// Apply metadata reranking if available and enabled
|
|
324
|
-
if (availableStrategies.metadata && weights.metadata && weights.metadata > 0) {
|
|
325
|
-
const strategyStartTime = Date.now();
|
|
326
|
-
try {
|
|
327
|
-
console.log(`🔧 Applying metadata reranking (weight: ${weights.metadata})`);
|
|
328
|
-
const metadataResults = await availableStrategies.metadata(query, hybridResults, contentType);
|
|
329
|
-
// Combine scores with metadata weight
|
|
330
|
-
hybridResults = hybridResults.map((result, index) => {
|
|
331
|
-
const metadataScore = metadataResults[index]?.score || result.score;
|
|
332
|
-
const currentScore = result.score;
|
|
333
|
-
const combinedScore = currentScore * (1 - weights.metadata) + metadataScore * weights.metadata;
|
|
334
|
-
return {
|
|
335
|
-
...result,
|
|
336
|
-
score: combinedScore,
|
|
337
|
-
metadata: {
|
|
338
|
-
...result.metadata,
|
|
339
|
-
hybridScores: {
|
|
340
|
-
...(result.metadata?.hybridScores || {}),
|
|
341
|
-
metadata: metadataScore,
|
|
342
|
-
combined: combinedScore
|
|
343
|
-
}
|
|
344
|
-
}
|
|
345
|
-
};
|
|
346
|
-
});
|
|
347
|
-
const strategyDuration = Date.now() - strategyStartTime;
|
|
348
|
-
strategyResults.metadata = { success: true, duration: strategyDuration };
|
|
349
|
-
successfulStrategies++;
|
|
350
|
-
console.log(`✅ Metadata reranking completed (${strategyDuration}ms)`);
|
|
351
|
-
}
|
|
352
|
-
catch (error) {
|
|
353
|
-
const strategyDuration = Date.now() - strategyStartTime;
|
|
354
|
-
const errorMessage = error instanceof Error ? error.message : 'Unknown error';
|
|
355
|
-
strategyResults.metadata = { success: false, error: errorMessage, duration: strategyDuration };
|
|
356
|
-
console.warn(`❌ Metadata reranking failed in hybrid mode (${strategyDuration}ms): ${errorMessage}`);
|
|
357
|
-
}
|
|
358
|
-
}
|
|
359
|
-
// Sort by final combined scores
|
|
360
|
-
hybridResults.sort((a, b) => b.score - a.score);
|
|
361
|
-
const totalDuration = Date.now() - startTime;
|
|
362
|
-
// Add hybrid reranking metadata to results
|
|
363
|
-
hybridResults = hybridResults.map(result => ({
|
|
364
|
-
...result,
|
|
365
|
-
metadata: {
|
|
366
|
-
...result.metadata,
|
|
367
|
-
hybridRerankingInfo: {
|
|
368
|
-
totalDuration,
|
|
369
|
-
successfulStrategies,
|
|
370
|
-
strategyResults,
|
|
371
|
-
weights
|
|
372
|
-
}
|
|
373
|
-
}
|
|
374
|
-
}));
|
|
375
|
-
if (successfulStrategies > 0) {
|
|
376
|
-
console.log(`✅ Hybrid reranking completed successfully (${totalDuration}ms, ${successfulStrategies}/${Object.keys(availableStrategies).length} strategies succeeded)`);
|
|
377
|
-
}
|
|
378
|
-
else {
|
|
379
|
-
console.warn(`⚠️ Hybrid reranking completed with no successful strategies (${totalDuration}ms), returning original results`);
|
|
380
|
-
return results; // Return original results if no strategies succeeded
|
|
381
|
-
}
|
|
382
|
-
return hybridResults;
|
|
383
|
-
}
|
|
384
|
-
catch (error) {
|
|
385
|
-
const totalDuration = Date.now() - startTime;
|
|
386
|
-
const errorMessage = error instanceof Error ? error.message : 'Unknown error';
|
|
387
|
-
console.warn(`❌ Hybrid reranking failed (${totalDuration}ms): ${errorMessage}. ` +
|
|
388
|
-
`Returning original results.`);
|
|
389
|
-
// Log detailed error information
|
|
390
|
-
console.error('Hybrid reranking error details:', {
|
|
391
|
-
query: query.substring(0, 100) + (query.length > 100 ? '...' : ''),
|
|
392
|
-
resultCount: results.length,
|
|
393
|
-
contentType,
|
|
394
|
-
availableStrategies: Object.keys(availableStrategies),
|
|
395
|
-
weights,
|
|
396
|
-
strategyResults,
|
|
397
|
-
error: errorMessage
|
|
398
|
-
});
|
|
399
|
-
return results.map(result => ({
|
|
400
|
-
...result,
|
|
401
|
-
metadata: {
|
|
402
|
-
...result.metadata,
|
|
403
|
-
hybridRerankingFailed: true,
|
|
404
|
-
hybridRerankingError: errorMessage,
|
|
405
|
-
fallbackToVectorSimilarity: true
|
|
406
|
-
}
|
|
407
|
-
}));
|
|
408
|
-
}
|
|
409
|
-
};
|
|
230
|
+
throw new RerankingStrategyError('hybrid', 'multimodal', 'Hybrid reranking strategy has been removed in this version. Use text-derived instead.', 'STRATEGY_REMOVED');
|
|
410
231
|
}
|
|
411
232
|
/**
|
|
412
233
|
* Create reranker with automatic mode detection
|
|
@@ -582,8 +403,6 @@ export function getRerankingStats() {
|
|
|
582
403
|
strategiesUsed: {
|
|
583
404
|
'cross-encoder': 0,
|
|
584
405
|
'text-derived': 0,
|
|
585
|
-
'metadata': 0,
|
|
586
|
-
'hybrid': 0,
|
|
587
406
|
'disabled': 0
|
|
588
407
|
}
|
|
589
408
|
};
|
|
@@ -194,7 +194,7 @@ export class TextDerivedRerankingStrategy {
|
|
|
194
194
|
catch (error) {
|
|
195
195
|
console.warn(`Failed to generate description for image ${imagePath}: ${error instanceof Error ? error.message : 'Unknown error'}`);
|
|
196
196
|
// Fallback to filename-based description
|
|
197
|
-
const filename = imagePath.split('/').pop() || imagePath;
|
|
197
|
+
const filename = imagePath.split('/').pop() || imagePath.split('\\').pop() || imagePath;
|
|
198
198
|
return `Image file: ${filename}`;
|
|
199
199
|
}
|
|
200
200
|
}
|
|
@@ -211,16 +211,17 @@ export class TextDerivedRerankingStrategy {
|
|
|
211
211
|
// Step 1: Convert images to text descriptions
|
|
212
212
|
const processedResults = await Promise.all(results.map(async (result) => {
|
|
213
213
|
if (result.contentType === 'image') {
|
|
214
|
-
// Generate text description for image
|
|
215
|
-
const description = await this.generateImageDescription(result.
|
|
214
|
+
// Generate text description for image using the file path from document.source
|
|
215
|
+
const description = await this.generateImageDescription(result.document.source);
|
|
216
216
|
return {
|
|
217
217
|
...result,
|
|
218
218
|
content: description,
|
|
219
|
+
contentType: 'text', // Change to 'text' so cross-encoder will process it
|
|
219
220
|
originalContent: result.content,
|
|
220
221
|
originalContentType: result.contentType,
|
|
221
222
|
metadata: {
|
|
222
223
|
...result.metadata,
|
|
223
|
-
originalImagePath: result.
|
|
224
|
+
originalImagePath: result.document.source,
|
|
224
225
|
generatedDescription: description
|
|
225
226
|
}
|
|
226
227
|
};
|
package/dist/core/search.d.ts
CHANGED
|
@@ -80,6 +80,16 @@ export declare class SearchEngine {
|
|
|
80
80
|
* @returns Promise resolving to array of search results
|
|
81
81
|
*/
|
|
82
82
|
search(query: string, options?: SearchOptions): Promise<SearchResult[]>;
|
|
83
|
+
/**
|
|
84
|
+
* Perform semantic search using a pre-computed embedding vector
|
|
85
|
+
* Useful for image-based search or when embedding is computed externally
|
|
86
|
+
* @param queryVector - Pre-computed query embedding vector
|
|
87
|
+
* @param options - Search options including top_k and rerank settings
|
|
88
|
+
* @param originalQuery - Optional original query for reranking (text or image path)
|
|
89
|
+
* @param embeddingTime - Optional embedding time for logging
|
|
90
|
+
* @returns Promise resolving to array of search results
|
|
91
|
+
*/
|
|
92
|
+
searchWithVector(queryVector: Float32Array, options?: SearchOptions, originalQuery?: string, embeddingTime?: number): Promise<SearchResult[]>;
|
|
83
93
|
/**
|
|
84
94
|
* Format search results with proper structure
|
|
85
95
|
* @param chunks - Database chunks with metadata
|
package/dist/core/search.js
CHANGED
|
@@ -106,18 +106,40 @@ export class SearchEngine {
|
|
|
106
106
|
return [];
|
|
107
107
|
}
|
|
108
108
|
const startTime = performance.now();
|
|
109
|
-
const topK = options.top_k || config.top_k || 10;
|
|
110
|
-
const shouldRerank = options.rerank !== undefined ? options.rerank : (this.rerankFn !== undefined);
|
|
111
109
|
try {
|
|
112
110
|
// Step 1: Build query embedding using injected embed function
|
|
113
111
|
const embeddingStartTime = performance.now();
|
|
114
112
|
const queryEmbedding = await this.embedFn(query);
|
|
115
113
|
const embeddingTime = performance.now() - embeddingStartTime;
|
|
116
|
-
// Step 2: Search
|
|
114
|
+
// Step 2: Search with the vector
|
|
115
|
+
const results = await this.searchWithVector(queryEmbedding.vector, options, query, embeddingTime);
|
|
116
|
+
return results;
|
|
117
|
+
}
|
|
118
|
+
catch (error) {
|
|
119
|
+
throw new Error(`Search failed: ${error instanceof Error ? error.message : 'Unknown error'}`);
|
|
120
|
+
}
|
|
121
|
+
}
|
|
122
|
+
/**
|
|
123
|
+
* Perform semantic search using a pre-computed embedding vector
|
|
124
|
+
* Useful for image-based search or when embedding is computed externally
|
|
125
|
+
* @param queryVector - Pre-computed query embedding vector
|
|
126
|
+
* @param options - Search options including top_k and rerank settings
|
|
127
|
+
* @param originalQuery - Optional original query for reranking (text or image path)
|
|
128
|
+
* @param embeddingTime - Optional embedding time for logging
|
|
129
|
+
* @returns Promise resolving to array of search results
|
|
130
|
+
*/
|
|
131
|
+
async searchWithVector(queryVector, options = {}, originalQuery, embeddingTime) {
|
|
132
|
+
const startTime = performance.now();
|
|
133
|
+
const topK = options.top_k || config.top_k || 10;
|
|
134
|
+
// Phase 1: Disable reranking by default for better performance
|
|
135
|
+
// Users must explicitly opt-in with --rerank flag
|
|
136
|
+
const shouldRerank = options.rerank === true;
|
|
137
|
+
try {
|
|
138
|
+
// Step 1: Search using IndexManager (which handles hash mapping properly)
|
|
117
139
|
const searchStartTime = performance.now();
|
|
118
140
|
let searchResult;
|
|
119
141
|
try {
|
|
120
|
-
searchResult = this.indexManager.search(
|
|
142
|
+
searchResult = this.indexManager.search(queryVector, topK);
|
|
121
143
|
}
|
|
122
144
|
catch (error) {
|
|
123
145
|
if (error instanceof Error && error.message.includes('No embedding ID found for hash')) {
|
|
@@ -133,18 +155,18 @@ export class SearchEngine {
|
|
|
133
155
|
console.log(`No similar documents found (${totalTime.toFixed(2)}ms total)`);
|
|
134
156
|
return [];
|
|
135
157
|
}
|
|
136
|
-
// Step
|
|
158
|
+
// Step 2: Retrieve chunks from database using embedding IDs
|
|
137
159
|
const retrievalStartTime = performance.now();
|
|
138
160
|
const chunks = await getChunksByEmbeddingIds(this.db, searchResult.embeddingIds);
|
|
139
161
|
const retrievalTime = performance.now() - retrievalStartTime;
|
|
140
|
-
// Step
|
|
162
|
+
// Step 3: Format results as JSON with text, score, and document metadata
|
|
141
163
|
let results = this.formatSearchResults(chunks, searchResult.distances, searchResult.embeddingIds);
|
|
142
|
-
// Step
|
|
164
|
+
// Step 4: Optional reranking with injected rerank function
|
|
143
165
|
let rerankTime = 0;
|
|
144
|
-
if (shouldRerank && this.rerankFn && results.length > 1) {
|
|
166
|
+
if (shouldRerank && this.rerankFn && results.length > 1 && originalQuery) {
|
|
145
167
|
try {
|
|
146
168
|
const rerankStartTime = performance.now();
|
|
147
|
-
results = await this.rerankFn(
|
|
169
|
+
results = await this.rerankFn(originalQuery, results);
|
|
148
170
|
rerankTime = performance.now() - rerankStartTime;
|
|
149
171
|
}
|
|
150
172
|
catch (error) {
|
|
@@ -154,13 +176,14 @@ export class SearchEngine {
|
|
|
154
176
|
}
|
|
155
177
|
const totalTime = performance.now() - startTime;
|
|
156
178
|
// Measure latency without premature optimization - just log for monitoring
|
|
179
|
+
const embedTimeStr = embeddingTime !== undefined ? `embed: ${embeddingTime.toFixed(2)}ms, ` : '';
|
|
157
180
|
console.log(`Search completed: ${results.length} results in ${totalTime.toFixed(2)}ms ` +
|
|
158
|
-
`(
|
|
181
|
+
`(${embedTimeStr}vector: ${vectorSearchTime.toFixed(2)}ms, ` +
|
|
159
182
|
`retrieval: ${retrievalTime.toFixed(2)}ms${rerankTime > 0 ? `, rerank: ${rerankTime.toFixed(2)}ms` : ''})`);
|
|
160
183
|
return results;
|
|
161
184
|
}
|
|
162
185
|
catch (error) {
|
|
163
|
-
throw new Error(`
|
|
186
|
+
throw new Error(`Vector search failed: ${error instanceof Error ? error.message : 'Unknown error'}`);
|
|
164
187
|
}
|
|
165
188
|
}
|
|
166
189
|
/**
|
|
@@ -323,7 +323,9 @@ export class IngestionFactory {
|
|
|
323
323
|
const { getSystemInfo, setSystemInfo } = await import('../core/db.js');
|
|
324
324
|
// Determine the effective mode and reranking strategy
|
|
325
325
|
const effectiveMode = options.mode || 'text';
|
|
326
|
-
|
|
326
|
+
// Phase 1: Fix mode-specific reranking strategy defaults
|
|
327
|
+
const effectiveRerankingStrategy = options.rerankingStrategy ||
|
|
328
|
+
(effectiveMode === 'multimodal' ? 'text-derived' : 'cross-encoder');
|
|
327
329
|
// Determine model type based on model name
|
|
328
330
|
let modelType;
|
|
329
331
|
if (effectiveModel.includes('clip')) {
|