rag-lite-ts 2.0.5 → 2.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +815 -808
- package/dist/cli/indexer.js +3 -39
- package/dist/cli/search.d.ts +1 -1
- package/dist/cli/search.js +123 -19
- package/dist/cli.js +77 -94
- package/dist/core/binary-index-format.d.ts +28 -2
- package/dist/core/binary-index-format.js +196 -27
- package/dist/core/db.js +173 -173
- package/dist/core/ingestion.d.ts +5 -1
- package/dist/core/ingestion.js +123 -18
- package/dist/core/lazy-dependency-loader.d.ts +3 -8
- package/dist/core/lazy-dependency-loader.js +11 -29
- package/dist/core/mode-detection-service.js +1 -1
- package/dist/core/reranking-config.d.ts +1 -1
- package/dist/core/reranking-config.js +7 -16
- package/dist/core/reranking-factory.js +3 -184
- package/dist/core/search.d.ts +10 -0
- package/dist/core/search.js +35 -11
- package/dist/core/types.d.ts +1 -1
- package/dist/core/vector-index.d.ts +4 -0
- package/dist/core/vector-index.js +6 -0
- package/dist/factories/ingestion-factory.js +3 -1
- package/dist/file-processor.d.ts +2 -0
- package/dist/file-processor.js +20 -0
- package/dist/index-manager.d.ts +17 -1
- package/dist/index-manager.js +148 -7
- package/dist/mcp-server.js +127 -105
- package/dist/multimodal/clip-embedder.js +6 -2
- package/package.json +1 -1
package/dist/core/search.js
CHANGED
|
@@ -106,18 +106,41 @@ export class SearchEngine {
|
|
|
106
106
|
return [];
|
|
107
107
|
}
|
|
108
108
|
const startTime = performance.now();
|
|
109
|
-
const topK = options.top_k || config.top_k || 10;
|
|
110
|
-
const shouldRerank = options.rerank !== undefined ? options.rerank : (this.rerankFn !== undefined);
|
|
111
109
|
try {
|
|
112
110
|
// Step 1: Build query embedding using injected embed function
|
|
113
111
|
const embeddingStartTime = performance.now();
|
|
114
112
|
const queryEmbedding = await this.embedFn(query);
|
|
115
113
|
const embeddingTime = performance.now() - embeddingStartTime;
|
|
116
|
-
// Step 2: Search
|
|
114
|
+
// Step 2: Search with the vector
|
|
115
|
+
const results = await this.searchWithVector(queryEmbedding.vector, options, query, embeddingTime);
|
|
116
|
+
return results;
|
|
117
|
+
}
|
|
118
|
+
catch (error) {
|
|
119
|
+
throw new Error(`Search failed: ${error instanceof Error ? error.message : 'Unknown error'}`);
|
|
120
|
+
}
|
|
121
|
+
}
|
|
122
|
+
/**
|
|
123
|
+
* Perform semantic search using a pre-computed embedding vector
|
|
124
|
+
* Useful for image-based search or when embedding is computed externally
|
|
125
|
+
* @param queryVector - Pre-computed query embedding vector
|
|
126
|
+
* @param options - Search options including top_k and rerank settings
|
|
127
|
+
* @param originalQuery - Optional original query for reranking (text or image path)
|
|
128
|
+
* @param embeddingTime - Optional embedding time for logging
|
|
129
|
+
* @returns Promise resolving to array of search results
|
|
130
|
+
*/
|
|
131
|
+
async searchWithVector(queryVector, options = {}, originalQuery, embeddingTime) {
|
|
132
|
+
const startTime = performance.now();
|
|
133
|
+
const topK = options.top_k || config.top_k || 10;
|
|
134
|
+
// Phase 1: Disable reranking by default for better performance
|
|
135
|
+
// Users must explicitly opt-in with --rerank flag
|
|
136
|
+
const shouldRerank = options.rerank === true;
|
|
137
|
+
try {
|
|
138
|
+
// Step 1: Search using IndexManager (which handles hash mapping properly)
|
|
117
139
|
const searchStartTime = performance.now();
|
|
118
140
|
let searchResult;
|
|
119
141
|
try {
|
|
120
|
-
|
|
142
|
+
const contentType = options.contentType;
|
|
143
|
+
searchResult = this.indexManager.search(queryVector, topK, contentType);
|
|
121
144
|
}
|
|
122
145
|
catch (error) {
|
|
123
146
|
if (error instanceof Error && error.message.includes('No embedding ID found for hash')) {
|
|
@@ -133,18 +156,18 @@ export class SearchEngine {
|
|
|
133
156
|
console.log(`No similar documents found (${totalTime.toFixed(2)}ms total)`);
|
|
134
157
|
return [];
|
|
135
158
|
}
|
|
136
|
-
// Step
|
|
159
|
+
// Step 2: Retrieve chunks from database using embedding IDs
|
|
137
160
|
const retrievalStartTime = performance.now();
|
|
138
161
|
const chunks = await getChunksByEmbeddingIds(this.db, searchResult.embeddingIds);
|
|
139
162
|
const retrievalTime = performance.now() - retrievalStartTime;
|
|
140
|
-
// Step
|
|
163
|
+
// Step 3: Format results as JSON with text, score, and document metadata
|
|
141
164
|
let results = this.formatSearchResults(chunks, searchResult.distances, searchResult.embeddingIds);
|
|
142
|
-
// Step
|
|
165
|
+
// Step 4: Optional reranking with injected rerank function
|
|
143
166
|
let rerankTime = 0;
|
|
144
|
-
if (shouldRerank && this.rerankFn && results.length > 1) {
|
|
167
|
+
if (shouldRerank && this.rerankFn && results.length > 1 && originalQuery) {
|
|
145
168
|
try {
|
|
146
169
|
const rerankStartTime = performance.now();
|
|
147
|
-
results = await this.rerankFn(
|
|
170
|
+
results = await this.rerankFn(originalQuery, results);
|
|
148
171
|
rerankTime = performance.now() - rerankStartTime;
|
|
149
172
|
}
|
|
150
173
|
catch (error) {
|
|
@@ -154,13 +177,14 @@ export class SearchEngine {
|
|
|
154
177
|
}
|
|
155
178
|
const totalTime = performance.now() - startTime;
|
|
156
179
|
// Measure latency without premature optimization - just log for monitoring
|
|
180
|
+
const embedTimeStr = embeddingTime !== undefined ? `embed: ${embeddingTime.toFixed(2)}ms, ` : '';
|
|
157
181
|
console.log(`Search completed: ${results.length} results in ${totalTime.toFixed(2)}ms ` +
|
|
158
|
-
`(
|
|
182
|
+
`(${embedTimeStr}vector: ${vectorSearchTime.toFixed(2)}ms, ` +
|
|
159
183
|
`retrieval: ${retrievalTime.toFixed(2)}ms${rerankTime > 0 ? `, rerank: ${rerankTime.toFixed(2)}ms` : ''})`);
|
|
160
184
|
return results;
|
|
161
185
|
}
|
|
162
186
|
catch (error) {
|
|
163
|
-
throw new Error(`
|
|
187
|
+
throw new Error(`Vector search failed: ${error instanceof Error ? error.message : 'Unknown error'}`);
|
|
164
188
|
}
|
|
165
189
|
}
|
|
166
190
|
/**
|
package/dist/core/types.d.ts
CHANGED
|
@@ -64,5 +64,9 @@ export declare class VectorIndex {
|
|
|
64
64
|
* Resize index to accommodate more vectors
|
|
65
65
|
*/
|
|
66
66
|
resizeIndex(newMaxElements: number): void;
|
|
67
|
+
/**
|
|
68
|
+
* Get index options (for external access to configuration)
|
|
69
|
+
*/
|
|
70
|
+
getOptions(): VectorIndexOptions;
|
|
67
71
|
}
|
|
68
72
|
//# sourceMappingURL=vector-index.d.ts.map
|
|
@@ -321,5 +321,11 @@ export class VectorIndex {
|
|
|
321
321
|
throw new Error(`Failed to resize index: ${error}`);
|
|
322
322
|
}
|
|
323
323
|
}
|
|
324
|
+
/**
|
|
325
|
+
* Get index options (for external access to configuration)
|
|
326
|
+
*/
|
|
327
|
+
getOptions() {
|
|
328
|
+
return { ...this.options };
|
|
329
|
+
}
|
|
324
330
|
}
|
|
325
331
|
//# sourceMappingURL=vector-index.js.map
|
|
@@ -323,7 +323,9 @@ export class IngestionFactory {
|
|
|
323
323
|
const { getSystemInfo, setSystemInfo } = await import('../core/db.js');
|
|
324
324
|
// Determine the effective mode and reranking strategy
|
|
325
325
|
const effectiveMode = options.mode || 'text';
|
|
326
|
-
|
|
326
|
+
// Phase 1: Fix mode-specific reranking strategy defaults
|
|
327
|
+
const effectiveRerankingStrategy = options.rerankingStrategy ||
|
|
328
|
+
(effectiveMode === 'multimodal' ? 'text-derived' : 'cross-encoder');
|
|
327
329
|
// Determine model type based on model name
|
|
328
330
|
let modelType;
|
|
329
331
|
if (effectiveModel.includes('clip')) {
|
package/dist/file-processor.d.ts
CHANGED
|
@@ -8,6 +8,8 @@ export interface FileProcessorOptions {
|
|
|
8
8
|
recursive?: boolean;
|
|
9
9
|
/** Maximum file size in bytes (default: 10MB) */
|
|
10
10
|
maxFileSize?: number;
|
|
11
|
+
/** Processing mode to filter compatible files */
|
|
12
|
+
mode?: 'text' | 'multimodal';
|
|
11
13
|
}
|
|
12
14
|
/**
|
|
13
15
|
* Default options for file processing
|
package/dist/file-processor.js
CHANGED
|
@@ -188,6 +188,15 @@ async function discoverFilesRecursive(dirPath, options) {
|
|
|
188
188
|
// Check file size based on content type
|
|
189
189
|
const stats = await fs.stat(fullPath);
|
|
190
190
|
const contentType = getContentType(fullPath);
|
|
191
|
+
// Filter by mode: skip incompatible content types
|
|
192
|
+
const mode = options.mode || 'text';
|
|
193
|
+
if (mode === 'text' && contentType === 'image') {
|
|
194
|
+
result.skipped.push({
|
|
195
|
+
path: fullPath,
|
|
196
|
+
reason: `Image files not supported in text mode. Use --mode multimodal for image processing.`
|
|
197
|
+
});
|
|
198
|
+
continue;
|
|
199
|
+
}
|
|
191
200
|
// Different size limits for different content types
|
|
192
201
|
const maxSize = contentType === 'image'
|
|
193
202
|
? 50 * 1024 * 1024 // 50MB for images
|
|
@@ -250,6 +259,17 @@ export async function discoverFiles(path, options = DEFAULT_FILE_PROCESSOR_OPTIO
|
|
|
250
259
|
};
|
|
251
260
|
}
|
|
252
261
|
const contentType = getContentType(resolvedPath);
|
|
262
|
+
// Filter by mode: skip incompatible content types
|
|
263
|
+
const mode = options.mode || 'text';
|
|
264
|
+
if (mode === 'text' && contentType === 'image') {
|
|
265
|
+
return {
|
|
266
|
+
files: [],
|
|
267
|
+
skipped: [{
|
|
268
|
+
path: resolvedPath,
|
|
269
|
+
reason: `Image files not supported in text mode. Use --mode multimodal for image processing.`
|
|
270
|
+
}]
|
|
271
|
+
};
|
|
272
|
+
}
|
|
253
273
|
// Check file size based on content type
|
|
254
274
|
const maxSize = contentType === 'image'
|
|
255
275
|
? 50 * 1024 * 1024 // 50MB for images
|
package/dist/index-manager.d.ts
CHANGED
|
@@ -7,12 +7,16 @@ export interface IndexStats {
|
|
|
7
7
|
export declare class IndexManager {
|
|
8
8
|
private modelName?;
|
|
9
9
|
private vectorIndex;
|
|
10
|
+
private textIndex?;
|
|
11
|
+
private imageIndex?;
|
|
10
12
|
private db;
|
|
11
13
|
private indexPath;
|
|
12
14
|
private dbPath;
|
|
13
15
|
private isInitialized;
|
|
14
16
|
private hashToEmbeddingId;
|
|
15
17
|
private embeddingIdToHash;
|
|
18
|
+
private groupedEmbeddings?;
|
|
19
|
+
private vectorIndexOptions;
|
|
16
20
|
constructor(indexPath: string, dbPath: string, dimensions: number, modelName?: string | undefined);
|
|
17
21
|
/**
|
|
18
22
|
* Initialize the index manager and load existing index if available
|
|
@@ -30,6 +34,10 @@ export declare class IndexManager {
|
|
|
30
34
|
* Requirements: 5.3 - When new documents are added THEN system SHALL append new chunks and vectors without rebuilding existing index
|
|
31
35
|
*/
|
|
32
36
|
addVectors(embeddings: EmbeddingResult[]): Promise<void>;
|
|
37
|
+
/**
|
|
38
|
+
* Add grouped embeddings by content type (for new grouped format)
|
|
39
|
+
*/
|
|
40
|
+
addGroupedEmbeddings(textEmbeddings: EmbeddingResult[], imageEmbeddings: EmbeddingResult[]): Promise<void>;
|
|
33
41
|
/**
|
|
34
42
|
* Rebuild the entire index from scratch
|
|
35
43
|
* Requirements: 5.2, 5.4 - Create full index rebuild functionality for model changes or document deletions
|
|
@@ -68,10 +76,18 @@ export declare class IndexManager {
|
|
|
68
76
|
* Save the vector index to disk
|
|
69
77
|
*/
|
|
70
78
|
saveIndex(): Promise<void>;
|
|
79
|
+
/**
|
|
80
|
+
* Create specialized indexes for text and image content when grouped data is available
|
|
81
|
+
*/
|
|
82
|
+
private createSpecializedIndexes;
|
|
83
|
+
/**
|
|
84
|
+
* Save index with content type grouping (for new grouped format)
|
|
85
|
+
*/
|
|
86
|
+
saveGroupedIndex(textEmbeddings: EmbeddingResult[], imageEmbeddings: EmbeddingResult[]): Promise<void>;
|
|
71
87
|
/**
|
|
72
88
|
* Search for similar vectors
|
|
73
89
|
*/
|
|
74
|
-
search(queryVector: Float32Array, k?: number): {
|
|
90
|
+
search(queryVector: Float32Array, k?: number, contentType?: 'text' | 'image' | 'combined'): {
|
|
75
91
|
embeddingIds: string[];
|
|
76
92
|
distances: number[];
|
|
77
93
|
};
|
package/dist/index-manager.js
CHANGED
|
@@ -1,26 +1,33 @@
|
|
|
1
1
|
import { VectorIndex } from './core/vector-index.js';
|
|
2
|
+
import { BinaryIndexFormat } from './core/binary-index-format.js';
|
|
2
3
|
import { openDatabase, getSystemInfo, setSystemInfo } from './core/db.js';
|
|
3
4
|
import { config, getModelDefaults } from './core/config.js';
|
|
4
5
|
export class IndexManager {
|
|
5
6
|
modelName;
|
|
6
7
|
vectorIndex;
|
|
8
|
+
textIndex;
|
|
9
|
+
imageIndex;
|
|
7
10
|
db = null;
|
|
8
11
|
indexPath;
|
|
9
12
|
dbPath;
|
|
10
13
|
isInitialized = false;
|
|
11
14
|
hashToEmbeddingId = new Map();
|
|
12
15
|
embeddingIdToHash = new Map();
|
|
16
|
+
groupedEmbeddings;
|
|
17
|
+
vectorIndexOptions;
|
|
13
18
|
constructor(indexPath, dbPath, dimensions, modelName) {
|
|
14
19
|
this.modelName = modelName;
|
|
15
20
|
this.indexPath = indexPath;
|
|
16
21
|
this.dbPath = dbPath;
|
|
17
|
-
//
|
|
18
|
-
this.
|
|
22
|
+
// Store options for creating specialized indexes
|
|
23
|
+
this.vectorIndexOptions = {
|
|
19
24
|
dimensions: dimensions,
|
|
20
25
|
maxElements: 100000, // Start with 100k capacity
|
|
21
26
|
efConstruction: 200,
|
|
22
27
|
M: 16
|
|
23
|
-
}
|
|
28
|
+
};
|
|
29
|
+
// Initialize with provided dimensions from config
|
|
30
|
+
this.vectorIndex = new VectorIndex(indexPath, this.vectorIndexOptions);
|
|
24
31
|
}
|
|
25
32
|
/**
|
|
26
33
|
* Initialize the index manager and load existing index if available
|
|
@@ -47,6 +54,8 @@ export class IndexManager {
|
|
|
47
54
|
// Only try to load existing index if not forcing recreation
|
|
48
55
|
console.log('Loading existing vector index...');
|
|
49
56
|
await this.vectorIndex.loadIndex();
|
|
57
|
+
// Check if the loaded index has grouped data and create specialized indexes
|
|
58
|
+
await this.createSpecializedIndexes();
|
|
50
59
|
}
|
|
51
60
|
// Always populate the embedding ID mapping from existing database entries
|
|
52
61
|
// This is needed both for new and existing indexes
|
|
@@ -55,7 +64,8 @@ export class IndexManager {
|
|
|
55
64
|
this.hashEmbeddingId(chunk.embedding_id); // This will populate the mapping
|
|
56
65
|
}
|
|
57
66
|
this.isInitialized = true;
|
|
58
|
-
|
|
67
|
+
const vectorCount = this.vectorIndex.getCurrentCount();
|
|
68
|
+
console.log(`Index manager initialized with ${vectorCount} vectors${this.textIndex && this.imageIndex ? ' (multi-graph mode)' : ''}`);
|
|
59
69
|
}
|
|
60
70
|
catch (error) {
|
|
61
71
|
throw new Error(`Failed to initialize index manager: ${error}`);
|
|
@@ -153,6 +163,31 @@ export class IndexManager {
|
|
|
153
163
|
throw new Error(`Failed to add vectors to index: ${error instanceof Error ? error.message : 'Unknown error'}`);
|
|
154
164
|
}
|
|
155
165
|
}
|
|
166
|
+
/**
|
|
167
|
+
* Add grouped embeddings by content type (for new grouped format)
|
|
168
|
+
*/
|
|
169
|
+
async addGroupedEmbeddings(textEmbeddings, imageEmbeddings) {
|
|
170
|
+
if (!this.isInitialized) {
|
|
171
|
+
throw new Error('Index manager not initialized');
|
|
172
|
+
}
|
|
173
|
+
console.log(`addGroupedEmbeddings: text=${textEmbeddings.length}, image=${imageEmbeddings.length}`);
|
|
174
|
+
const allEmbeddings = [...textEmbeddings, ...imageEmbeddings];
|
|
175
|
+
if (allEmbeddings.length === 0) {
|
|
176
|
+
return;
|
|
177
|
+
}
|
|
178
|
+
try {
|
|
179
|
+
// Store grouped information for later saving
|
|
180
|
+
this.groupedEmbeddings = { text: textEmbeddings, image: imageEmbeddings };
|
|
181
|
+
console.log('addGroupedEmbeddings: stored grouped embeddings');
|
|
182
|
+
// Add all embeddings to the index (maintains current behavior)
|
|
183
|
+
await this.addVectors(allEmbeddings);
|
|
184
|
+
console.log('addGroupedEmbeddings: addVectors completed');
|
|
185
|
+
// The saveIndex method will now use grouped format if groupedEmbeddings exists
|
|
186
|
+
}
|
|
187
|
+
catch (error) {
|
|
188
|
+
throw new Error(`Failed to add grouped embeddings to index: ${error instanceof Error ? error.message : 'Unknown error'}`);
|
|
189
|
+
}
|
|
190
|
+
}
|
|
156
191
|
/**
|
|
157
192
|
* Rebuild the entire index from scratch
|
|
158
193
|
* Requirements: 5.2, 5.4 - Create full index rebuild functionality for model changes or document deletions
|
|
@@ -349,16 +384,122 @@ export class IndexManager {
|
|
|
349
384
|
if (!this.isInitialized) {
|
|
350
385
|
throw new Error('Index manager not initialized');
|
|
351
386
|
}
|
|
352
|
-
|
|
387
|
+
// If we have grouped embeddings, save in grouped format
|
|
388
|
+
if (this.groupedEmbeddings) {
|
|
389
|
+
console.log('IndexManager: Saving in grouped format');
|
|
390
|
+
await this.saveGroupedIndex(this.groupedEmbeddings.text, this.groupedEmbeddings.image);
|
|
391
|
+
// Clear grouped data after saving
|
|
392
|
+
this.groupedEmbeddings = undefined;
|
|
393
|
+
}
|
|
394
|
+
else {
|
|
395
|
+
console.log('IndexManager: Saving in standard format');
|
|
396
|
+
await this.vectorIndex.saveIndex();
|
|
397
|
+
}
|
|
398
|
+
}
|
|
399
|
+
/**
|
|
400
|
+
* Create specialized indexes for text and image content when grouped data is available
|
|
401
|
+
*/
|
|
402
|
+
async createSpecializedIndexes() {
|
|
403
|
+
try {
|
|
404
|
+
// Load the index data to check if it has grouped information
|
|
405
|
+
const indexData = await BinaryIndexFormat.load(this.indexPath);
|
|
406
|
+
if (indexData.hasContentTypeGroups && indexData.textVectors && indexData.imageVectors) {
|
|
407
|
+
// Only create specialized indexes if we have both text and image vectors
|
|
408
|
+
// In text-only mode, textVectors would be populated but imageVectors empty
|
|
409
|
+
// In multimodal mode, both would be populated
|
|
410
|
+
const hasTextVectors = indexData.textVectors.length > 0;
|
|
411
|
+
const hasImageVectors = indexData.imageVectors.length > 0;
|
|
412
|
+
if (hasTextVectors && hasImageVectors) {
|
|
413
|
+
console.log('Creating specialized indexes for content type filtering...');
|
|
414
|
+
// Create text-only index
|
|
415
|
+
this.textIndex = new VectorIndex(`${this.indexPath}.text`, this.vectorIndexOptions);
|
|
416
|
+
await this.textIndex.initialize();
|
|
417
|
+
this.textIndex.addVectors(indexData.textVectors);
|
|
418
|
+
console.log(`✓ Text index created with ${indexData.textVectors.length} vectors`);
|
|
419
|
+
// Create image-only index
|
|
420
|
+
this.imageIndex = new VectorIndex(`${this.indexPath}.image`, this.vectorIndexOptions);
|
|
421
|
+
await this.imageIndex.initialize();
|
|
422
|
+
this.imageIndex.addVectors(indexData.imageVectors);
|
|
423
|
+
console.log(`✓ Image index created with ${indexData.imageVectors.length} vectors`);
|
|
424
|
+
console.log('✓ Specialized indexes ready for content type filtering');
|
|
425
|
+
}
|
|
426
|
+
else if (hasTextVectors) {
|
|
427
|
+
console.log('Text-only index detected - using combined index for all searches');
|
|
428
|
+
// In text-only mode, we don't need specialized indexes
|
|
429
|
+
// The combined index (vectorIndex) already contains all text vectors
|
|
430
|
+
}
|
|
431
|
+
}
|
|
432
|
+
}
|
|
433
|
+
catch (error) {
|
|
434
|
+
console.warn('Failed to create specialized indexes, falling back to combined index:', error);
|
|
435
|
+
// Continue without specialized indexes - search will still work with combined index
|
|
436
|
+
}
|
|
437
|
+
}
|
|
438
|
+
/**
|
|
439
|
+
* Save index with content type grouping (for new grouped format)
|
|
440
|
+
*/
|
|
441
|
+
async saveGroupedIndex(textEmbeddings, imageEmbeddings) {
|
|
442
|
+
if (!this.isInitialized) {
|
|
443
|
+
throw new Error('Index manager not initialized');
|
|
444
|
+
}
|
|
445
|
+
console.log(`saveGroupedIndex: text=${textEmbeddings.length}, image=${imageEmbeddings.length}`);
|
|
446
|
+
// Group vectors by content type
|
|
447
|
+
const textVectors = textEmbeddings.map((embedding) => ({
|
|
448
|
+
id: this.hashEmbeddingId(embedding.embedding_id),
|
|
449
|
+
vector: embedding.vector
|
|
450
|
+
}));
|
|
451
|
+
const imageVectors = imageEmbeddings.map((embedding) => ({
|
|
452
|
+
id: this.hashEmbeddingId(embedding.embedding_id),
|
|
453
|
+
vector: embedding.vector
|
|
454
|
+
}));
|
|
455
|
+
// Get index parameters
|
|
456
|
+
const options = this.vectorIndex.getOptions();
|
|
457
|
+
const allVectors = [...textVectors, ...imageVectors];
|
|
458
|
+
console.log(`saveGroupedIndex: dimensions=${options.dimensions}, totalVectors=${allVectors.length}`);
|
|
459
|
+
const indexData = {
|
|
460
|
+
dimensions: options.dimensions,
|
|
461
|
+
maxElements: options.maxElements,
|
|
462
|
+
M: options.M || 16,
|
|
463
|
+
efConstruction: options.efConstruction || 200,
|
|
464
|
+
seed: options.seed || 100,
|
|
465
|
+
currentSize: textVectors.length + imageVectors.length,
|
|
466
|
+
vectors: allVectors, // Required for backward compatibility
|
|
467
|
+
hasContentTypeGroups: true,
|
|
468
|
+
textVectors,
|
|
469
|
+
imageVectors
|
|
470
|
+
};
|
|
471
|
+
console.log('saveGroupedIndex: Calling BinaryIndexFormat.saveGrouped');
|
|
472
|
+
// Save using grouped format
|
|
473
|
+
await BinaryIndexFormat.saveGrouped(this.indexPath, indexData);
|
|
474
|
+
console.log(`✓ Saved grouped index with ${textVectors.length} text and ${imageVectors.length} image vectors`);
|
|
353
475
|
}
|
|
354
476
|
/**
|
|
355
477
|
* Search for similar vectors
|
|
356
478
|
*/
|
|
357
|
-
search(queryVector, k = 5) {
|
|
479
|
+
search(queryVector, k = 5, contentType) {
|
|
358
480
|
if (!this.isInitialized) {
|
|
359
481
|
throw new Error('Index manager not initialized');
|
|
360
482
|
}
|
|
361
|
-
|
|
483
|
+
// Select the appropriate index based on content type
|
|
484
|
+
let targetIndex;
|
|
485
|
+
// If we have specialized indexes (multimodal mode), use them for filtering
|
|
486
|
+
if (this.textIndex && this.imageIndex) {
|
|
487
|
+
if (contentType === 'text') {
|
|
488
|
+
targetIndex = this.textIndex;
|
|
489
|
+
}
|
|
490
|
+
else if (contentType === 'image') {
|
|
491
|
+
targetIndex = this.imageIndex;
|
|
492
|
+
}
|
|
493
|
+
else {
|
|
494
|
+
// 'combined' or undefined
|
|
495
|
+
targetIndex = this.vectorIndex;
|
|
496
|
+
}
|
|
497
|
+
}
|
|
498
|
+
else {
|
|
499
|
+
// No specialized indexes (text-only mode) - ignore contentType and use combined index
|
|
500
|
+
targetIndex = this.vectorIndex;
|
|
501
|
+
}
|
|
502
|
+
const results = targetIndex.search(queryVector, k);
|
|
362
503
|
// Convert numeric IDs back to embedding IDs
|
|
363
504
|
const embeddingIds = results.neighbors.map(id => this.unhashEmbeddingId(id));
|
|
364
505
|
return {
|