rag-lite-ts 2.0.5 → 2.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -11,14 +11,14 @@ import { EXIT_CODES, ConfigurationError } from '../core/config.js';
11
11
  async function validateModeConfiguration(options) {
12
12
  const mode = options.mode || 'text';
13
13
  const model = options.embeddingModel;
14
- const rerankingStrategy = options.rerankingStrategy;
15
14
  // Define supported models for each mode
16
15
  const textModels = [
17
16
  'sentence-transformers/all-MiniLM-L6-v2',
18
17
  'Xenova/all-mpnet-base-v2'
19
18
  ];
20
19
  const multimodalModels = [
21
- 'Xenova/clip-vit-base-patch32'
20
+ 'Xenova/clip-vit-base-patch32',
21
+ 'Xenova/clip-vit-base-patch16'
22
22
  ];
23
23
  // Validate model compatibility with mode
24
24
  if (model) {
@@ -64,35 +64,6 @@ async function validateModeConfiguration(options) {
64
64
  }
65
65
  }
66
66
  }
67
- // Validate reranking strategy compatibility with mode
68
- if (rerankingStrategy) {
69
- const textStrategies = ['cross-encoder', 'disabled'];
70
- const multimodalStrategies = ['text-derived', 'metadata', 'disabled'];
71
- if (mode === 'text' && !textStrategies.includes(rerankingStrategy)) {
72
- throw new ConfigurationError(`Reranking strategy '${rerankingStrategy}' is not supported for text mode.\n` +
73
- `\n` +
74
- `Supported strategies for text mode:\n` +
75
- ` cross-encoder Use cross-encoder model for reranking (default)\n` +
76
- ` disabled No reranking, use vector similarity only\n` +
77
- `\n` +
78
- `Examples:\n` +
79
- ` raglite ingest <path> --mode text --rerank-strategy cross-encoder\n` +
80
- ` raglite ingest <path> --mode text --rerank-strategy disabled\n`, EXIT_CODES.INVALID_ARGUMENTS);
81
- }
82
- if (mode === 'multimodal' && !multimodalStrategies.includes(rerankingStrategy)) {
83
- throw new ConfigurationError(`Reranking strategy '${rerankingStrategy}' is not supported for multimodal mode.\n` +
84
- `\n` +
85
- `Supported strategies for multimodal mode:\n` +
86
- ` text-derived Convert images to text, then use cross-encoder (default)\n` +
87
- ` metadata Use filename and metadata-based scoring\n` +
88
- ` disabled No reranking, use vector similarity only\n` +
89
- `\n` +
90
- `Examples:\n` +
91
- ` raglite ingest <path> --mode multimodal --rerank-strategy text-derived\n` +
92
- ` raglite ingest <path> --mode multimodal --rerank-strategy metadata\n` +
93
- ` raglite ingest <path> --mode multimodal --rerank-strategy disabled\n`, EXIT_CODES.INVALID_ARGUMENTS);
94
- }
95
- }
96
67
  // Log the final configuration
97
68
  console.log('✅ Mode configuration validated successfully');
98
69
  if (mode !== 'text') {
@@ -101,9 +72,6 @@ async function validateModeConfiguration(options) {
101
72
  if (model) {
102
73
  console.log(` Model: ${model}`);
103
74
  }
104
- if (rerankingStrategy) {
105
- console.log(` Reranking: ${rerankingStrategy}`);
106
- }
107
75
  }
108
76
  /**
109
77
  * Run document ingestion from CLI
@@ -191,10 +159,6 @@ export async function runIngest(path, options = {}) {
191
159
  factoryOptions.mode = options.mode;
192
160
  console.log(`Using processing mode: ${options.mode}`);
193
161
  }
194
- if (options['rerank-strategy']) {
195
- factoryOptions.rerankingStrategy = options['rerank-strategy'];
196
- console.log(`Using reranking strategy: ${options['rerank-strategy']}`);
197
- }
198
162
  if (options.rebuildIfNeeded) {
199
163
  factoryOptions.forceRebuild = true;
200
164
  console.log('Force rebuild enabled due to rebuildIfNeeded option');
@@ -234,7 +198,7 @@ export async function runIngest(path, options = {}) {
234
198
  showProgress: true,
235
199
  maxWaitMs: 15000 // Longer timeout for ingestion
236
200
  });
237
- const result = await pipeline.ingestPath(resolvedPath);
201
+ const result = await pipeline.ingestPath(resolvedPath, { mode: factoryOptions.mode });
238
202
  // Display final results
239
203
  console.log('\n' + '='.repeat(50));
240
204
  console.log('INGESTION SUMMARY');
@@ -1,6 +1,6 @@
1
1
  /**
2
2
  * Run search from CLI
3
- * @param query - Search query string
3
+ * @param query - Search query string or image file path
4
4
  * @param options - CLI options including top-k and rerank settings
5
5
  */
6
6
  export declare function runSearch(query: string, options?: Record<string, any>): Promise<void>;
@@ -1,10 +1,36 @@
1
- import { existsSync } from 'fs';
1
+ import { existsSync, statSync } from 'fs';
2
+ import { extname } from 'path';
2
3
  import { SearchFactory } from '../factories/search-factory.js';
3
4
  import { withCLIDatabaseAccess, setupCLICleanup } from '../core/cli-database-utils.js';
4
5
  import { config, EXIT_CODES, ConfigurationError } from '../core/config.js';
6
+ /**
7
+ * Detect if query is an image file path
8
+ * @param query - Query string to check
9
+ * @returns True if query is a valid image file path
10
+ */
11
+ function isImageFile(query) {
12
+ // Check if file exists
13
+ if (!existsSync(query)) {
14
+ return false;
15
+ }
16
+ // Check if it's a file (not a directory)
17
+ try {
18
+ const stats = statSync(query);
19
+ if (!stats.isFile()) {
20
+ return false;
21
+ }
22
+ }
23
+ catch {
24
+ return false;
25
+ }
26
+ // Check file extension
27
+ const ext = extname(query).toLowerCase();
28
+ const imageExtensions = ['.jpg', '.jpeg', '.png', '.gif', '.webp', '.bmp'];
29
+ return imageExtensions.includes(ext);
30
+ }
5
31
  /**
6
32
  * Run search from CLI
7
- * @param query - Search query string
33
+ * @param query - Search query string or image file path
8
34
  * @param options - CLI options including top-k and rerank settings
9
35
  */
10
36
  export async function runSearch(query, options = {}) {
@@ -16,14 +42,18 @@ export async function runSearch(query, options = {}) {
16
42
  console.error('Error: Search query cannot be empty');
17
43
  console.error('');
18
44
  console.error('Usage: raglite search <query>');
45
+ console.error(' raglite search <image-path>');
19
46
  console.error('');
20
47
  console.error('Examples:');
21
48
  console.error(' raglite search "machine learning"');
22
49
  console.error(' raglite search "how to install"');
50
+ console.error(' raglite search ./photo.jpg');
23
51
  process.exit(EXIT_CODES.INVALID_ARGUMENTS);
24
52
  }
25
- // Validate query length
26
- if (query.trim().length > 500) {
53
+ // Detect if query is an image file
54
+ const isImage = isImageFile(query);
55
+ // Validate query length for text queries
56
+ if (!isImage && query.trim().length > 500) {
27
57
  console.error('Error: Search query is too long (maximum 500 characters)');
28
58
  console.error('');
29
59
  console.error('Please use a shorter, more specific query.');
@@ -53,40 +83,102 @@ export async function runSearch(query, options = {}) {
53
83
  console.error('If the problem persists, check that your documents were processed correctly.');
54
84
  process.exit(EXIT_CODES.INDEX_ERROR);
55
85
  }
56
- console.log(`Searching for: "${query}"`);
86
+ // Display search type
87
+ if (isImage) {
88
+ console.log(`Searching with image: "${query}"`);
89
+ }
90
+ else {
91
+ console.log(`Searching for: "${query}"`);
92
+ }
57
93
  console.log('');
58
94
  // Setup graceful cleanup
59
95
  setupCLICleanup(effectiveConfig.db_file);
60
96
  // Initialize search engine using polymorphic factory with database protection
61
97
  let searchEngine;
98
+ let embedder;
62
99
  try {
63
100
  // Create search engine using SearchFactory (auto-detects mode)
64
101
  searchEngine = await withCLIDatabaseAccess(effectiveConfig.db_file, () => SearchFactory.create(effectiveConfig.index_file, effectiveConfig.db_file), {
65
102
  commandName: 'Search command',
66
103
  showProgress: true
67
104
  });
105
+ // For image queries, we need to check if the mode supports images
106
+ if (isImage) {
107
+ // Get system info to check mode
108
+ const { ModeDetectionService } = await import('../core/mode-detection-service.js');
109
+ const modeService = new ModeDetectionService(effectiveConfig.db_file);
110
+ const systemInfo = await modeService.detectMode();
111
+ if (systemInfo.mode !== 'multimodal') {
112
+ console.error('Error: Image search is only supported in multimodal mode');
113
+ console.error('');
114
+ console.error('Your database is configured for text-only mode.');
115
+ console.error('To enable image search:');
116
+ console.error('1. Re-ingest your documents with multimodal mode:');
117
+ console.error(' raglite ingest <path> --mode multimodal');
118
+ console.error('2. Then search with images:');
119
+ console.error(' raglite search ./photo.jpg');
120
+ process.exit(EXIT_CODES.INVALID_ARGUMENTS);
121
+ }
122
+ // Create embedder for image embedding
123
+ const { createEmbedder } = await import('../core/embedder-factory.js');
124
+ embedder = await createEmbedder(systemInfo.modelName);
125
+ // Check if embedder supports images
126
+ const { supportsImages } = await import('../core/universal-embedder.js');
127
+ if (!supportsImages(embedder)) {
128
+ console.error('Error: The current model does not support image embedding');
129
+ console.error('');
130
+ console.error(`Model: ${systemInfo.modelName}`);
131
+ console.error('Image search requires a multimodal model like CLIP.');
132
+ process.exit(EXIT_CODES.MODEL_ERROR);
133
+ }
134
+ }
68
135
  // Prepare search options
69
136
  const searchOptions = {};
70
137
  if (options['top-k'] !== undefined) {
71
138
  searchOptions.top_k = options['top-k'];
72
139
  }
73
- if (options.rerank !== undefined) {
74
- searchOptions.rerank = options.rerank;
75
- }
76
- // Perform search
77
- const startTime = Date.now();
78
- let results = await searchEngine.search(query, searchOptions);
79
- const searchTime = Date.now() - startTime;
80
- // Apply content type filter if specified
140
+ // Set content type filter for search-level filtering
81
141
  const contentTypeFilter = options['content-type'];
82
142
  if (contentTypeFilter && contentTypeFilter !== 'all') {
83
- const originalCount = results.length;
84
- results = results.filter(r => r.contentType === contentTypeFilter);
85
- if (results.length < originalCount) {
86
- console.log(`Filtered to ${results.length} ${contentTypeFilter} result${results.length === 1 ? '' : 's'} (from ${originalCount} total)`);
87
- console.log('');
143
+ searchOptions.contentType = contentTypeFilter;
144
+ }
145
+ // Phase 2: Disable reranking for image-to-image searches to preserve visual similarity
146
+ let rerankingForciblyDisabled = false;
147
+ if (isImage && embedder) {
148
+ // Force disable reranking for image searches, regardless of user setting
149
+ searchOptions.rerank = false;
150
+ rerankingForciblyDisabled = true;
151
+ // Warn user if they tried to enable reranking for image search
152
+ if (options.rerank === true) {
153
+ console.warn('⚠️ Reranking is disabled for image-to-image search to preserve visual similarity.');
154
+ console.warn(' Image-to-image search uses CLIP embeddings for direct visual matching.');
155
+ console.warn(' For text-to-image search, use: raglite search "description" --rerank');
156
+ console.warn('');
157
+ }
158
+ }
159
+ else {
160
+ // For text searches, use user setting (defaults to false from Phase 1)
161
+ if (options.rerank !== undefined) {
162
+ searchOptions.rerank = options.rerank;
88
163
  }
89
164
  }
165
+ // Track whether reranking will actually be used in this search
166
+ const rerankingUsed = searchOptions.rerank === true;
167
+ // Perform search
168
+ const startTime = Date.now();
169
+ let results;
170
+ if (isImage && embedder) {
171
+ // Image-based search: embed the image and search with the vector
172
+ console.log('Embedding image...');
173
+ const imageEmbedding = await embedder.embedImage(query);
174
+ console.log('Searching with image embedding...');
175
+ results = await searchEngine.searchWithVector(imageEmbedding.vector, searchOptions);
176
+ }
177
+ else {
178
+ // Text-based search
179
+ results = await searchEngine.search(query, searchOptions);
180
+ }
181
+ const searchTime = Date.now() - startTime;
90
182
  // Display results
91
183
  if (results.length === 0) {
92
184
  console.log('No results found.');
@@ -129,13 +221,25 @@ export async function runSearch(query, options = {}) {
129
221
  console.log('─'.repeat(50));
130
222
  console.log(`Search completed in ${searchTime}ms`);
131
223
  console.log(`Searched ${stats.totalChunks} chunks`);
132
- if (stats.rerankingEnabled) {
224
+ if (rerankingUsed) {
133
225
  console.log('Reranking: enabled');
134
226
  }
227
+ else if (rerankingForciblyDisabled) {
228
+ console.log('Reranking: disabled');
229
+ }
230
+ else if (stats.rerankingEnabled) {
231
+ console.log('Reranking: available (not used)');
232
+ }
233
+ else {
234
+ console.log('Reranking: disabled');
235
+ }
135
236
  }
136
237
  }
137
238
  finally {
138
239
  // Cleanup resources
240
+ if (embedder) {
241
+ await embedder.cleanup();
242
+ }
139
243
  if (searchEngine) {
140
244
  await searchEngine.cleanup();
141
245
  }
package/dist/cli.js CHANGED
@@ -18,61 +18,60 @@ function showVersion() {
18
18
  * Display help information
19
19
  */
20
20
  function showHelp() {
21
- console.log(`
22
- RAG-lite TS v${packageJson.version}
23
- Local-first TypeScript retrieval engine for semantic search
24
-
25
- Usage:
26
- raglite <command> [options]
27
-
28
- Commands:
29
- ingest <path> Ingest documents from file or directory
30
- search <query> Search indexed documents
31
- rebuild Rebuild the vector index
32
- version Show version information
33
- help Show this help message
34
-
35
- Examples:
36
- raglite ingest ./docs/ # Ingest all .md/.txt/.docx/.pdf files in docs/
37
- raglite ingest ./readme.md # Ingest single file
38
- raglite ingest ./docs/ --model Xenova/all-mpnet-base-v2 # Use higher quality model
39
- raglite ingest ./docs/ --mode multimodal # Enable multimodal processing
40
- raglite ingest ./docs/ --mode multimodal --rerank-strategy metadata # Use metadata reranking
41
- raglite ingest ./docs/ --path-strategy relative --path-base /project # Use relative paths
42
- raglite search "machine learning" # Search for documents about machine learning
43
- raglite search "API documentation" --top-k 10 # Get top 10 results
44
- raglite search "red car" --content-type image # Search only image results
45
-
46
- raglite rebuild # Rebuild the entire index
47
-
48
- Options for search:
49
- --top-k <number> Number of results to return (default: 10)
50
- --rerank Enable reranking for better results
51
- --no-rerank Disable reranking
52
- --content-type <type> Filter results by content type: 'text', 'image', or 'all' (default: all)
53
-
54
- Options for ingest:
55
- --model <name> Use specific embedding model
56
- --mode <mode> Processing mode: 'text' (default) or 'multimodal'
57
- --rerank-strategy <strategy> Reranking strategy for multimodal mode
58
- --rebuild-if-needed Automatically rebuild if model mismatch detected (WARNING: rebuilds entire index)
59
- --path-strategy <strategy> Path storage strategy: 'relative' (default) or 'absolute'
60
- --path-base <path> Base directory for relative paths (defaults to current directory)
61
-
62
- Available models:
63
- Text mode:
64
- sentence-transformers/all-MiniLM-L6-v2 (384 dim, fast, default)
65
- Xenova/all-mpnet-base-v2 (768 dim, higher quality)
66
- Multimodal mode:
67
- Xenova/clip-vit-base-patch32 (512 dim, faster, default)
68
- Xenova/clip-vit-base-patch16 (512 dim, more accurate, slower)
69
-
70
- Available reranking strategies (multimodal mode):
71
- text-derived Use image-to-text conversion + cross-encoder (default)
72
- metadata Use filename and metadata-based scoring
73
- disabled No reranking, use vector similarity only
74
-
75
- For more information, visit: https://github.com/your-repo/rag-lite-ts
21
+ console.log(`
22
+ RAG-lite TS v${packageJson.version}
23
+ Local-first TypeScript retrieval engine for semantic search
24
+
25
+ Usage:
26
+ raglite <command> [options]
27
+
28
+ Commands:
29
+ ingest <path> Ingest documents from file or directory
30
+ search <query> Search indexed documents (text or image)
31
+ rebuild Rebuild the vector index
32
+ version Show version information
33
+ help Show this help message
34
+
35
+ Examples:
36
+ raglite ingest ./docs/ # Ingest all .md/.txt/.docx/.pdf files in docs/
37
+ raglite ingest ./readme.md # Ingest single file
38
+ raglite ingest ./docs/ --model Xenova/all-mpnet-base-v2 # Use higher quality model
39
+ raglite ingest ./docs/ --mode multimodal # Enable multimodal processing
40
+ raglite ingest ./docs/ --path-strategy relative --path-base /project # Use relative paths
41
+ raglite search "machine learning" # Search for documents about machine learning
42
+ raglite search "API documentation" --top-k 10 # Get top 10 results
43
+ raglite search "red car" --content-type image # Search only image results
44
+ raglite search ./photo.jpg # Search with image (multimodal mode only)
45
+ raglite search ./image.png --top-k 5 # Find similar images
46
+
47
+ raglite rebuild # Rebuild the entire index
48
+
49
+ Options for search:
50
+ --top-k <number> Number of results to return (default: 10)
51
+ --rerank Enable reranking for better results
52
+ --no-rerank Disable reranking
53
+ --content-type <type> Filter results by content type: 'text', 'image', or 'all' (default: all)
54
+
55
+ Options for ingest:
56
+ --model <name> Use specific embedding model
57
+ --mode <mode> Processing mode: 'text' (default) or 'multimodal'
58
+ --rebuild-if-needed Automatically rebuild if model mismatch detected (WARNING: rebuilds entire index)
59
+ --path-strategy <strategy> Path storage strategy: 'relative' (default) or 'absolute'
60
+ --path-base <path> Base directory for relative paths (defaults to current directory)
61
+
62
+ Available models:
63
+ Text mode:
64
+ sentence-transformers/all-MiniLM-L6-v2 (384 dim, fast, default)
65
+ Xenova/all-mpnet-base-v2 (768 dim, higher quality)
66
+ Multimodal mode:
67
+ Xenova/clip-vit-base-patch32 (512 dim, faster, default)
68
+ Xenova/clip-vit-base-patch16 (512 dim, more accurate, slower)
69
+
70
+ Available reranking strategies (multimodal mode):
71
+ text-derived Use image-to-text conversion + cross-encoder (default)
72
+ disabled No reranking, use vector similarity only
73
+
74
+ For more information, visit: https://github.com/your-repo/rag-lite-ts
76
75
  `);
77
76
  }
78
77
  /**
@@ -98,9 +97,16 @@ function parseArgs() {
98
97
  const arg = args[i];
99
98
  if (arg.startsWith('--')) {
100
99
  const optionName = arg.slice(2);
101
- // Handle boolean flags
100
+ // Handle boolean flags with optional values
102
101
  if (optionName === 'rerank') {
103
- options.rerank = true;
102
+ const nextArg = args[i + 1];
103
+ if (nextArg && (nextArg === 'true' || nextArg === 'false')) {
104
+ options.rerank = nextArg === 'true';
105
+ i++; // Skip the next argument as it's the value
106
+ }
107
+ else {
108
+ options.rerank = true;
109
+ }
104
110
  }
105
111
  else if (optionName === 'no-rerank') {
106
112
  options.rerank = false;
@@ -136,6 +142,17 @@ function parseArgs() {
136
142
  * Validate command line arguments
137
143
  */
138
144
  function validateArgs(command, args, options) {
145
+ // Phase 3: Reject the removed --rerank-strategy option
146
+ if (options['rerank-strategy'] !== undefined) {
147
+ console.error('Error: --rerank-strategy option has been removed in this version');
148
+ console.error('');
149
+ console.error('Reranking strategy is now automatically selected based on mode:');
150
+ console.error(' Text mode: cross-encoder (or disabled)');
151
+ console.error(' Multimodal mode: text-derived (or disabled)');
152
+ console.error('');
153
+ console.error('Use --rerank or --no-rerank to control reranking instead.');
154
+ process.exit(EXIT_CODES.INVALID_ARGUMENTS);
155
+ }
139
156
  switch (command) {
140
157
  case 'ingest':
141
158
  if (args.length === 0) {
@@ -148,12 +165,10 @@ function validateArgs(command, args, options) {
148
165
  console.error(' raglite ingest ./readme.md # Ingest single file');
149
166
  console.error(' raglite ingest ./docs/ --model Xenova/all-mpnet-base-v2 # Use higher quality model');
150
167
  console.error(' raglite ingest ./docs/ --mode multimodal # Enable multimodal processing');
151
- console.error(' raglite ingest ./docs/ --mode multimodal --rerank-strategy metadata # Use metadata reranking');
152
168
  console.error('');
153
169
  console.error('Options:');
154
170
  console.error(' --model <name> Use specific embedding model');
155
171
  console.error(' --mode <mode> Processing mode: text (default) or multimodal');
156
- console.error(' --rerank-strategy <strategy> Reranking strategy for multimodal mode');
157
172
  console.error(' --rebuild-if-needed Automatically rebuild if model mismatch detected');
158
173
  console.error('');
159
174
  console.error('The path can be either a file (.md or .txt) or a directory.');
@@ -165,12 +180,15 @@ function validateArgs(command, args, options) {
165
180
  console.error('Error: search command requires a query argument');
166
181
  console.error('');
167
182
  console.error('Usage: raglite search <query> [options]');
183
+ console.error(' raglite search <image-path> [options]');
168
184
  console.error('');
169
185
  console.error('Examples:');
170
186
  console.error(' raglite search "machine learning"');
171
187
  console.error(' raglite search "API documentation" --top-k 10');
172
188
  console.error(' raglite search "tutorial" --rerank');
173
189
  console.error(' raglite search "red car" --content-type image');
190
+ console.error(' raglite search ./photo.jpg # Image search (multimodal mode)');
191
+ console.error(' raglite search ./image.png --top-k 5 # Find similar images');
174
192
  console.error('');
175
193
  console.error('Options:');
176
194
  console.error(' --top-k <number> Number of results to return (default: 10)');
@@ -269,41 +287,6 @@ function validateArgs(command, args, options) {
269
287
  process.exit(EXIT_CODES.INVALID_ARGUMENTS);
270
288
  }
271
289
  }
272
- // Validate rerank-strategy option (only for ingest command with multimodal mode)
273
- if (options['rerank-strategy'] !== undefined) {
274
- if (command !== 'ingest') {
275
- console.error(`Error: --rerank-strategy option is only available for the 'ingest' command`);
276
- console.error('');
277
- console.error('Reranking strategy is configured during ingestion and used automatically during search.');
278
- process.exit(EXIT_CODES.INVALID_ARGUMENTS);
279
- }
280
- const mode = options.mode || 'text';
281
- if (mode !== 'multimodal') {
282
- console.error(`Error: --rerank-strategy option is only available in multimodal mode`);
283
- console.error('');
284
- console.error('To use reranking strategies, specify --mode multimodal');
285
- console.error('');
286
- console.error('Examples:');
287
- console.error(' raglite ingest ./docs/ --mode multimodal --rerank-strategy text-derived');
288
- console.error(' raglite ingest ./docs/ --mode multimodal --rerank-strategy metadata');
289
- process.exit(EXIT_CODES.INVALID_ARGUMENTS);
290
- }
291
- const supportedStrategies = ['text-derived', 'metadata', 'disabled'];
292
- if (!supportedStrategies.includes(options['rerank-strategy'])) {
293
- console.error(`Error: Unsupported reranking strategy '${options['rerank-strategy']}'`);
294
- console.error('');
295
- console.error('Supported strategies for multimodal mode:');
296
- console.error(' text-derived Convert images to text, then use cross-encoder (default)');
297
- console.error(' metadata Use filename and metadata-based scoring');
298
- console.error(' disabled No reranking, use vector similarity only');
299
- console.error('');
300
- console.error('Examples:');
301
- console.error(' --rerank-strategy text-derived');
302
- console.error(' --rerank-strategy metadata');
303
- console.error(' --rerank-strategy disabled');
304
- process.exit(EXIT_CODES.INVALID_ARGUMENTS);
305
- }
306
- }
307
290
  // Validate model option (only for ingest command)
308
291
  if (options.model !== undefined) {
309
292
  if (command !== 'ingest') {
@@ -25,10 +25,19 @@ export interface BinaryIndexData {
25
25
  id: number;
26
26
  vector: Float32Array;
27
27
  }>;
28
+ hasContentTypeGroups?: boolean;
29
+ textVectors?: Array<{
30
+ id: number;
31
+ vector: Float32Array;
32
+ }>;
33
+ imageVectors?: Array<{
34
+ id: number;
35
+ vector: Float32Array;
36
+ }>;
28
37
  }
29
38
  export declare class BinaryIndexFormat {
30
39
  /**
31
- * Save index data to binary format
40
+ * Save index data to binary format (original format for backward compatibility)
32
41
  *
33
42
  * File structure:
34
43
  * - Header (24 bytes): dimensions, maxElements, M, efConstruction, seed, currentSize
@@ -39,7 +48,24 @@ export declare class BinaryIndexFormat {
39
48
  */
40
49
  static save(indexPath: string, data: BinaryIndexData): Promise<void>;
41
50
  /**
42
- * Load index data from binary format
51
+ * Save index data to grouped binary format
52
+ *
53
+ * File structure:
54
+ * - Extended Header (40 bytes):
55
+ * - Original 6 fields (24 bytes)
56
+ * - hasGroups flag (4 bytes)
57
+ * - textOffset (4 bytes)
58
+ * - textCount (4 bytes)
59
+ * - imageOffset (4 bytes)
60
+ * - imageCount (4 bytes)
61
+ * - Data section: [text vectors...][image vectors...]
62
+ *
63
+ * @param indexPath Path to save the binary index file
64
+ * @param data Index data to serialize
65
+ */
66
+ static saveGrouped(indexPath: string, data: BinaryIndexData): Promise<void>;
67
+ /**
68
+ * Load index data from binary format (supports both original and grouped formats)
43
69
  *
44
70
  * Uses zero-copy Float32Array views for efficient loading.
45
71
  * Copies the views to ensure data persistence after buffer lifecycle.