rag-lite-ts 1.0.2 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (202) hide show
  1. package/README.md +606 -93
  2. package/dist/cli/indexer.js +192 -4
  3. package/dist/cli/search.js +50 -11
  4. package/dist/cli.js +183 -26
  5. package/dist/core/abstract-embedder.d.ts +125 -0
  6. package/dist/core/abstract-embedder.js +264 -0
  7. package/dist/core/actionable-error-messages.d.ts +60 -0
  8. package/dist/core/actionable-error-messages.js +397 -0
  9. package/dist/core/batch-processing-optimizer.d.ts +155 -0
  10. package/dist/core/batch-processing-optimizer.js +541 -0
  11. package/dist/core/chunker.d.ts +2 -0
  12. package/dist/core/cli-database-utils.d.ts +53 -0
  13. package/dist/core/cli-database-utils.js +239 -0
  14. package/dist/core/config.js +10 -3
  15. package/dist/core/content-errors.d.ts +111 -0
  16. package/dist/core/content-errors.js +362 -0
  17. package/dist/core/content-manager.d.ts +343 -0
  18. package/dist/core/content-manager.js +1504 -0
  19. package/dist/core/content-performance-optimizer.d.ts +150 -0
  20. package/dist/core/content-performance-optimizer.js +516 -0
  21. package/dist/core/content-resolver.d.ts +104 -0
  22. package/dist/core/content-resolver.js +285 -0
  23. package/dist/core/cross-modal-search.d.ts +164 -0
  24. package/dist/core/cross-modal-search.js +342 -0
  25. package/dist/core/database-connection-manager.d.ts +109 -0
  26. package/dist/core/database-connection-manager.js +304 -0
  27. package/dist/core/db.d.ts +141 -2
  28. package/dist/core/db.js +631 -89
  29. package/dist/core/embedder-factory.d.ts +176 -0
  30. package/dist/core/embedder-factory.js +338 -0
  31. package/dist/core/index.d.ts +3 -1
  32. package/dist/core/index.js +4 -1
  33. package/dist/core/ingestion.d.ts +85 -15
  34. package/dist/core/ingestion.js +510 -45
  35. package/dist/core/lazy-dependency-loader.d.ts +152 -0
  36. package/dist/core/lazy-dependency-loader.js +453 -0
  37. package/dist/core/mode-detection-service.d.ts +150 -0
  38. package/dist/core/mode-detection-service.js +565 -0
  39. package/dist/core/mode-model-validator.d.ts +92 -0
  40. package/dist/core/mode-model-validator.js +203 -0
  41. package/dist/core/model-registry.d.ts +120 -0
  42. package/dist/core/model-registry.js +415 -0
  43. package/dist/core/model-validator.d.ts +217 -0
  44. package/dist/core/model-validator.js +782 -0
  45. package/dist/core/polymorphic-search-factory.d.ts +154 -0
  46. package/dist/core/polymorphic-search-factory.js +344 -0
  47. package/dist/core/raglite-paths.d.ts +121 -0
  48. package/dist/core/raglite-paths.js +145 -0
  49. package/dist/core/reranking-config.d.ts +42 -0
  50. package/dist/core/reranking-config.js +156 -0
  51. package/dist/core/reranking-factory.d.ts +92 -0
  52. package/dist/core/reranking-factory.js +591 -0
  53. package/dist/core/reranking-strategies.d.ts +325 -0
  54. package/dist/core/reranking-strategies.js +720 -0
  55. package/dist/core/resource-cleanup.d.ts +163 -0
  56. package/dist/core/resource-cleanup.js +371 -0
  57. package/dist/core/resource-manager.d.ts +212 -0
  58. package/dist/core/resource-manager.js +564 -0
  59. package/dist/core/search.d.ts +28 -1
  60. package/dist/core/search.js +83 -5
  61. package/dist/core/streaming-operations.d.ts +145 -0
  62. package/dist/core/streaming-operations.js +409 -0
  63. package/dist/core/types.d.ts +3 -0
  64. package/dist/core/universal-embedder.d.ts +177 -0
  65. package/dist/core/universal-embedder.js +139 -0
  66. package/dist/core/validation-messages.d.ts +99 -0
  67. package/dist/core/validation-messages.js +334 -0
  68. package/dist/core/vector-index.js +7 -8
  69. package/dist/factories/index.d.ts +1 -1
  70. package/dist/factories/text-factory.d.ts +128 -34
  71. package/dist/factories/text-factory.js +346 -97
  72. package/dist/file-processor.d.ts +88 -2
  73. package/dist/file-processor.js +720 -17
  74. package/dist/index.d.ts +9 -0
  75. package/dist/index.js +11 -0
  76. package/dist/ingestion.d.ts +16 -0
  77. package/dist/ingestion.js +21 -0
  78. package/dist/mcp-server.d.ts +35 -3
  79. package/dist/mcp-server.js +1107 -31
  80. package/dist/multimodal/clip-embedder.d.ts +314 -0
  81. package/dist/multimodal/clip-embedder.js +945 -0
  82. package/dist/multimodal/index.d.ts +6 -0
  83. package/dist/multimodal/index.js +6 -0
  84. package/dist/run-error-recovery-tests.d.ts +7 -0
  85. package/dist/run-error-recovery-tests.js +101 -0
  86. package/dist/search.d.ts +26 -0
  87. package/dist/search.js +54 -1
  88. package/dist/test-utils.d.ts +8 -26
  89. package/dist/text/chunker.d.ts +1 -0
  90. package/dist/text/embedder.js +15 -8
  91. package/dist/text/index.d.ts +1 -0
  92. package/dist/text/index.js +1 -0
  93. package/dist/text/reranker.d.ts +1 -2
  94. package/dist/text/reranker.js +17 -47
  95. package/dist/text/sentence-transformer-embedder.d.ts +96 -0
  96. package/dist/text/sentence-transformer-embedder.js +340 -0
  97. package/dist/types.d.ts +39 -0
  98. package/dist/utils/vector-math.d.ts +31 -0
  99. package/dist/utils/vector-math.js +70 -0
  100. package/package.json +15 -3
  101. package/dist/api-errors.d.ts.map +0 -1
  102. package/dist/api-errors.js.map +0 -1
  103. package/dist/cli/indexer.d.ts.map +0 -1
  104. package/dist/cli/indexer.js.map +0 -1
  105. package/dist/cli/search.d.ts.map +0 -1
  106. package/dist/cli/search.js.map +0 -1
  107. package/dist/cli.d.ts.map +0 -1
  108. package/dist/cli.js.map +0 -1
  109. package/dist/config.d.ts.map +0 -1
  110. package/dist/config.js.map +0 -1
  111. package/dist/core/adapters.d.ts.map +0 -1
  112. package/dist/core/adapters.js.map +0 -1
  113. package/dist/core/chunker.d.ts.map +0 -1
  114. package/dist/core/chunker.js.map +0 -1
  115. package/dist/core/config.d.ts.map +0 -1
  116. package/dist/core/config.js.map +0 -1
  117. package/dist/core/db.d.ts.map +0 -1
  118. package/dist/core/db.js.map +0 -1
  119. package/dist/core/error-handler.d.ts.map +0 -1
  120. package/dist/core/error-handler.js.map +0 -1
  121. package/dist/core/index.d.ts.map +0 -1
  122. package/dist/core/index.js.map +0 -1
  123. package/dist/core/ingestion.d.ts.map +0 -1
  124. package/dist/core/ingestion.js.map +0 -1
  125. package/dist/core/interfaces.d.ts.map +0 -1
  126. package/dist/core/interfaces.js.map +0 -1
  127. package/dist/core/path-manager.d.ts.map +0 -1
  128. package/dist/core/path-manager.js.map +0 -1
  129. package/dist/core/search-example.d.ts +0 -25
  130. package/dist/core/search-example.d.ts.map +0 -1
  131. package/dist/core/search-example.js +0 -138
  132. package/dist/core/search-example.js.map +0 -1
  133. package/dist/core/search-pipeline-example.d.ts +0 -21
  134. package/dist/core/search-pipeline-example.d.ts.map +0 -1
  135. package/dist/core/search-pipeline-example.js +0 -188
  136. package/dist/core/search-pipeline-example.js.map +0 -1
  137. package/dist/core/search-pipeline.d.ts.map +0 -1
  138. package/dist/core/search-pipeline.js.map +0 -1
  139. package/dist/core/search.d.ts.map +0 -1
  140. package/dist/core/search.js.map +0 -1
  141. package/dist/core/types.d.ts.map +0 -1
  142. package/dist/core/types.js.map +0 -1
  143. package/dist/core/vector-index.d.ts.map +0 -1
  144. package/dist/core/vector-index.js.map +0 -1
  145. package/dist/dom-polyfills.d.ts.map +0 -1
  146. package/dist/dom-polyfills.js.map +0 -1
  147. package/dist/examples/clean-api-examples.d.ts +0 -44
  148. package/dist/examples/clean-api-examples.d.ts.map +0 -1
  149. package/dist/examples/clean-api-examples.js +0 -206
  150. package/dist/examples/clean-api-examples.js.map +0 -1
  151. package/dist/factories/index.d.ts.map +0 -1
  152. package/dist/factories/index.js.map +0 -1
  153. package/dist/factories/text-factory.d.ts.map +0 -1
  154. package/dist/factories/text-factory.js.map +0 -1
  155. package/dist/file-processor.d.ts.map +0 -1
  156. package/dist/file-processor.js.map +0 -1
  157. package/dist/index-manager.d.ts.map +0 -1
  158. package/dist/index-manager.js.map +0 -1
  159. package/dist/index.d.ts.map +0 -1
  160. package/dist/index.js.map +0 -1
  161. package/dist/indexer.d.ts.map +0 -1
  162. package/dist/indexer.js.map +0 -1
  163. package/dist/ingestion.d.ts.map +0 -1
  164. package/dist/ingestion.js.map +0 -1
  165. package/dist/mcp-server.d.ts.map +0 -1
  166. package/dist/mcp-server.js.map +0 -1
  167. package/dist/preprocess.d.ts.map +0 -1
  168. package/dist/preprocess.js.map +0 -1
  169. package/dist/preprocessors/index.d.ts.map +0 -1
  170. package/dist/preprocessors/index.js.map +0 -1
  171. package/dist/preprocessors/mdx.d.ts.map +0 -1
  172. package/dist/preprocessors/mdx.js.map +0 -1
  173. package/dist/preprocessors/mermaid.d.ts.map +0 -1
  174. package/dist/preprocessors/mermaid.js.map +0 -1
  175. package/dist/preprocessors/registry.d.ts.map +0 -1
  176. package/dist/preprocessors/registry.js.map +0 -1
  177. package/dist/search-standalone.d.ts.map +0 -1
  178. package/dist/search-standalone.js.map +0 -1
  179. package/dist/search.d.ts.map +0 -1
  180. package/dist/search.js.map +0 -1
  181. package/dist/test-utils.d.ts.map +0 -1
  182. package/dist/test-utils.js.map +0 -1
  183. package/dist/text/chunker.d.ts.map +0 -1
  184. package/dist/text/chunker.js.map +0 -1
  185. package/dist/text/embedder.d.ts.map +0 -1
  186. package/dist/text/embedder.js.map +0 -1
  187. package/dist/text/index.d.ts.map +0 -1
  188. package/dist/text/index.js.map +0 -1
  189. package/dist/text/preprocessors/index.d.ts.map +0 -1
  190. package/dist/text/preprocessors/index.js.map +0 -1
  191. package/dist/text/preprocessors/mdx.d.ts.map +0 -1
  192. package/dist/text/preprocessors/mdx.js.map +0 -1
  193. package/dist/text/preprocessors/mermaid.d.ts.map +0 -1
  194. package/dist/text/preprocessors/mermaid.js.map +0 -1
  195. package/dist/text/preprocessors/registry.d.ts.map +0 -1
  196. package/dist/text/preprocessors/registry.js.map +0 -1
  197. package/dist/text/reranker.d.ts.map +0 -1
  198. package/dist/text/reranker.js.map +0 -1
  199. package/dist/text/tokenizer.d.ts.map +0 -1
  200. package/dist/text/tokenizer.js.map +0 -1
  201. package/dist/types.d.ts.map +0 -1
  202. package/dist/types.js.map +0 -1
@@ -1,7 +1,110 @@
1
1
  import { existsSync, statSync } from 'fs';
2
2
  import { resolve } from 'path';
3
3
  import { TextIngestionFactory } from '../factories/text-factory.js';
4
+ import { withCLIDatabaseAccess, setupCLICleanup, isDatabaseBusy } from '../core/cli-database-utils.js';
4
5
  import { EXIT_CODES, ConfigurationError } from '../core/config.js';
6
+ /**
7
+ * Validate mode-specific model and strategy combinations
8
+ * Ensures that the selected model is compatible with the chosen mode
9
+ * and that reranking strategies are valid for the mode
10
+ */
11
+ async function validateModeConfiguration(options) {
12
+ const mode = options.mode || 'text';
13
+ const model = options.embeddingModel;
14
+ const rerankingStrategy = options.rerankingStrategy;
15
+ // Define supported models for each mode
16
+ const textModels = [
17
+ 'sentence-transformers/all-MiniLM-L6-v2',
18
+ 'Xenova/all-mpnet-base-v2'
19
+ ];
20
+ const multimodalModels = [
21
+ 'Xenova/clip-vit-base-patch32'
22
+ ];
23
+ // Validate model compatibility with mode
24
+ if (model) {
25
+ if (mode === 'text' && !textModels.includes(model)) {
26
+ if (multimodalModels.includes(model)) {
27
+ throw new ConfigurationError(`Model '${model}' is a multimodal model but text mode was selected.\n` +
28
+ `\n` +
29
+ `To use this model, specify multimodal mode:\n` +
30
+ ` raglite ingest <path> --mode multimodal --model ${model}\n` +
31
+ `\n` +
32
+ `Or choose a text model for text mode:\n` +
33
+ ` ${textModels.map(m => `raglite ingest <path> --model ${m}`).join('\n ')}\n`, EXIT_CODES.INVALID_ARGUMENTS);
34
+ }
35
+ else {
36
+ throw new ConfigurationError(`Model '${model}' is not supported for text mode.\n` +
37
+ `\n` +
38
+ `Supported models for text mode:\n` +
39
+ ` ${textModels.join('\n ')}\n` +
40
+ `\n` +
41
+ `Examples:\n` +
42
+ ` raglite ingest <path> --model sentence-transformers/all-MiniLM-L6-v2\n` +
43
+ ` raglite ingest <path> --model Xenova/all-mpnet-base-v2\n`, EXIT_CODES.INVALID_ARGUMENTS);
44
+ }
45
+ }
46
+ if (mode === 'multimodal' && !multimodalModels.includes(model)) {
47
+ if (textModels.includes(model)) {
48
+ throw new ConfigurationError(`Model '${model}' is a text-only model but multimodal mode was selected.\n` +
49
+ `\n` +
50
+ `To use this model, specify text mode:\n` +
51
+ ` raglite ingest <path> --mode text --model ${model}\n` +
52
+ `\n` +
53
+ `Or choose a multimodal model for multimodal mode:\n` +
54
+ ` ${multimodalModels.map(m => `raglite ingest <path> --mode multimodal --model ${m}`).join('\n ')}\n`, EXIT_CODES.INVALID_ARGUMENTS);
55
+ }
56
+ else {
57
+ throw new ConfigurationError(`Model '${model}' is not supported for multimodal mode.\n` +
58
+ `\n` +
59
+ `Supported models for multimodal mode:\n` +
60
+ ` ${multimodalModels.join('\n ')}\n` +
61
+ `\n` +
62
+ `Example:\n` +
63
+ ` raglite ingest <path> --mode multimodal --model Xenova/clip-vit-base-patch32\n`, EXIT_CODES.INVALID_ARGUMENTS);
64
+ }
65
+ }
66
+ }
67
+ // Validate reranking strategy compatibility with mode
68
+ if (rerankingStrategy) {
69
+ const textStrategies = ['cross-encoder', 'disabled'];
70
+ const multimodalStrategies = ['text-derived', 'metadata', 'disabled'];
71
+ if (mode === 'text' && !textStrategies.includes(rerankingStrategy)) {
72
+ throw new ConfigurationError(`Reranking strategy '${rerankingStrategy}' is not supported for text mode.\n` +
73
+ `\n` +
74
+ `Supported strategies for text mode:\n` +
75
+ ` cross-encoder Use cross-encoder model for reranking (default)\n` +
76
+ ` disabled No reranking, use vector similarity only\n` +
77
+ `\n` +
78
+ `Examples:\n` +
79
+ ` raglite ingest <path> --mode text --rerank-strategy cross-encoder\n` +
80
+ ` raglite ingest <path> --mode text --rerank-strategy disabled\n`, EXIT_CODES.INVALID_ARGUMENTS);
81
+ }
82
+ if (mode === 'multimodal' && !multimodalStrategies.includes(rerankingStrategy)) {
83
+ throw new ConfigurationError(`Reranking strategy '${rerankingStrategy}' is not supported for multimodal mode.\n` +
84
+ `\n` +
85
+ `Supported strategies for multimodal mode:\n` +
86
+ ` text-derived Convert images to text, then use cross-encoder (default)\n` +
87
+ ` metadata Use filename and metadata-based scoring\n` +
88
+ ` disabled No reranking, use vector similarity only\n` +
89
+ `\n` +
90
+ `Examples:\n` +
91
+ ` raglite ingest <path> --mode multimodal --rerank-strategy text-derived\n` +
92
+ ` raglite ingest <path> --mode multimodal --rerank-strategy metadata\n` +
93
+ ` raglite ingest <path> --mode multimodal --rerank-strategy disabled\n`, EXIT_CODES.INVALID_ARGUMENTS);
94
+ }
95
+ }
96
+ // Log the final configuration
97
+ console.log('✅ Mode configuration validated successfully');
98
+ if (mode !== 'text') {
99
+ console.log(` Mode: ${mode}`);
100
+ }
101
+ if (model) {
102
+ console.log(` Model: ${model}`);
103
+ }
104
+ if (rerankingStrategy) {
105
+ console.log(` Reranking: ${rerankingStrategy}`);
106
+ }
107
+ }
5
108
  /**
6
109
  * Run document ingestion from CLI
7
110
  * @param path - File or directory path to ingest
@@ -9,6 +112,7 @@ import { EXIT_CODES, ConfigurationError } from '../core/config.js';
9
112
  */
10
113
  export async function runIngest(path, options = {}) {
11
114
  try {
115
+ // Handle --rebuild-if-needed flag immediately to prevent dimension mismatch error
12
116
  // Validate path exists
13
117
  const resolvedPath = resolve(path);
14
118
  if (!existsSync(resolvedPath)) {
@@ -64,15 +168,53 @@ export async function runIngest(path, options = {}) {
64
168
  factoryOptions.embeddingModel = options.model;
65
169
  console.log(`Using embedding model: ${options.model}`);
66
170
  }
171
+ if (options.mode) {
172
+ factoryOptions.mode = options.mode;
173
+ console.log(`Using processing mode: ${options.mode}`);
174
+ }
175
+ if (options['rerank-strategy']) {
176
+ factoryOptions.rerankingStrategy = options['rerank-strategy'];
177
+ console.log(`Using reranking strategy: ${options['rerank-strategy']}`);
178
+ }
67
179
  if (options.rebuildIfNeeded) {
68
180
  factoryOptions.forceRebuild = true;
69
181
  console.log('Force rebuild enabled due to rebuildIfNeeded option');
182
+ // Delete old index file immediately to prevent dimension mismatch errors
183
+ const indexPath = process.env.RAG_INDEX_FILE || './vector-index.bin';
184
+ const { existsSync, unlinkSync } = await import('fs');
185
+ if (existsSync(indexPath)) {
186
+ try {
187
+ unlinkSync(indexPath);
188
+ console.log('🗑️ Removed old index file to prevent dimension mismatch');
189
+ }
190
+ catch (error) {
191
+ console.warn(`⚠️ Could not remove old index file: ${error}`);
192
+ }
193
+ }
194
+ }
195
+ // Validate mode-specific model and strategy combinations
196
+ await validateModeConfiguration(factoryOptions);
197
+ const dbPath = process.env.RAG_DB_FILE || './db.sqlite';
198
+ const indexPath = process.env.RAG_INDEX_FILE || './vector-index.bin';
199
+ // Setup graceful cleanup
200
+ setupCLICleanup(dbPath);
201
+ // Check if database is busy before starting
202
+ const busyStatus = await isDatabaseBusy(dbPath);
203
+ if (busyStatus.isBusy) {
204
+ console.log('⚠️ Database appears to be in use by another process');
205
+ console.log(` Reason: ${busyStatus.reason}`);
206
+ console.log(' Attempting to proceed anyway...');
207
+ console.log('');
70
208
  }
71
209
  // Create ingestion pipeline using factory
72
210
  let pipeline;
73
211
  try {
74
- // Create ingestion pipeline using TextIngestionFactory
75
- pipeline = await TextIngestionFactory.create(process.env.RAG_DB_FILE || './db.sqlite', process.env.RAG_INDEX_FILE || './vector-index.bin', factoryOptions);
212
+ // Create ingestion pipeline using TextIngestionFactory with database protection
213
+ pipeline = await withCLIDatabaseAccess(dbPath, () => TextIngestionFactory.create(dbPath, indexPath, factoryOptions), {
214
+ commandName: 'Ingestion command',
215
+ showProgress: true,
216
+ maxWaitMs: 15000 // Longer timeout for ingestion
217
+ });
76
218
  const result = await pipeline.ingestPath(resolvedPath);
77
219
  // Display final results
78
220
  console.log('\n' + '='.repeat(50));
@@ -93,12 +235,26 @@ export async function runIngest(path, options = {}) {
93
235
  console.log(`Processing rate: ${chunksPerSecond} chunks/second`);
94
236
  }
95
237
  console.log('\nIngestion completed successfully!');
238
+ // Display mode-specific information
239
+ const mode = options.mode || 'text';
240
+ if (mode === 'multimodal') {
241
+ console.log('✨ Multimodal mode enabled - you can now search across text and image content');
242
+ }
96
243
  console.log('You can now search your documents using: raglite search "your query"');
244
+ console.log('');
245
+ console.log('💡 The search command will automatically detect and use the ingestion mode.');
97
246
  }
98
247
  finally {
99
248
  if (pipeline) {
100
249
  await pipeline.cleanup();
101
250
  }
251
+ // Ensure clean exit for CLI commands
252
+ const { DatabaseConnectionManager } = await import('../core/database-connection-manager.js');
253
+ await DatabaseConnectionManager.closeAllConnections();
254
+ // Force exit for CLI commands to prevent hanging
255
+ setTimeout(() => {
256
+ process.exit(0);
257
+ }, 100);
102
258
  }
103
259
  }
104
260
  catch (error) {
@@ -178,12 +334,37 @@ export async function runRebuild() {
178
334
  console.log('');
179
335
  console.log('Progress will be shown below...');
180
336
  console.log('');
337
+ // Detect mode from existing database for rebuild
338
+ const dbPath = process.env.RAG_DB_FILE || './db.sqlite';
339
+ const indexPath = process.env.RAG_INDEX_FILE || './vector-index.bin';
340
+ let rebuildOptions = { forceRebuild: true };
341
+ if (existsSync(dbPath)) {
342
+ try {
343
+ // Import mode detection service
344
+ const { ModeDetectionService } = await import('../core/mode-detection-service.js');
345
+ const modeService = new ModeDetectionService(dbPath);
346
+ const systemInfo = await modeService.detectMode();
347
+ console.log(`🎯 Detected existing configuration:`);
348
+ console.log(` Mode: ${systemInfo.mode}`);
349
+ console.log(` Model: ${systemInfo.modelName}`);
350
+ console.log(` Reranking: ${systemInfo.rerankingStrategy}`);
351
+ console.log('');
352
+ // Use the detected configuration for rebuild
353
+ rebuildOptions.mode = systemInfo.mode;
354
+ rebuildOptions.embeddingModel = systemInfo.modelName;
355
+ rebuildOptions.rerankingStrategy = systemInfo.rerankingStrategy;
356
+ }
357
+ catch (error) {
358
+ console.warn('⚠️ Could not detect existing mode configuration, using defaults');
359
+ console.warn(` Error: ${error instanceof Error ? error.message : 'Unknown error'}`);
360
+ }
361
+ }
181
362
  // Create ingestion pipeline with force rebuild using factory
182
- const pipeline = await TextIngestionFactory.create(process.env.RAG_DB_FILE || './db.sqlite', process.env.RAG_INDEX_FILE || './vector-index.bin', { forceRebuild: true });
363
+ const pipeline = await TextIngestionFactory.create(dbPath, indexPath, rebuildOptions);
183
364
  try {
184
365
  // Get all documents from database and re-ingest them
185
366
  const { openDatabase } = await import('../core/db.js');
186
- const db = await openDatabase(process.env.RAG_DB_FILE || './db.sqlite');
367
+ const db = await openDatabase(dbPath);
187
368
  try {
188
369
  const documents = await db.all('SELECT DISTINCT source FROM documents ORDER BY source');
189
370
  if (documents.length === 0) {
@@ -240,6 +421,13 @@ export async function runRebuild() {
240
421
  }
241
422
  finally {
242
423
  await pipeline.cleanup();
424
+ // Ensure clean exit for CLI commands
425
+ const { DatabaseConnectionManager } = await import('../core/database-connection-manager.js');
426
+ await DatabaseConnectionManager.closeAllConnections();
427
+ // Force exit for CLI commands to prevent hanging
428
+ setTimeout(() => {
429
+ process.exit(0);
430
+ }, 100);
243
431
  }
244
432
  }
245
433
  catch (error) {
@@ -1,5 +1,6 @@
1
1
  import { existsSync } from 'fs';
2
- import { TextSearchFactory } from '../factories/text-factory.js';
2
+ import { PolymorphicSearchFactory } from '../core/polymorphic-search-factory.js';
3
+ import { withCLIDatabaseAccess, setupCLICleanup } from '../core/cli-database-utils.js';
3
4
  import { config, EXIT_CODES, ConfigurationError } from '../core/config.js';
4
5
  /**
5
6
  * Run search from CLI
@@ -54,15 +55,16 @@ export async function runSearch(query, options = {}) {
54
55
  }
55
56
  console.log(`Searching for: "${query}"`);
56
57
  console.log('');
57
- // Initialize search engine using factory
58
+ // Setup graceful cleanup
59
+ setupCLICleanup(effectiveConfig.db_file);
60
+ // Initialize search engine using polymorphic factory with database protection
58
61
  let searchEngine;
59
62
  try {
60
- // Prepare factory options
61
- const factoryOptions = {
62
- enableReranking: options.rerank !== undefined ? options.rerank : effectiveConfig.rerank_enabled
63
- };
64
- // Create search engine using TextSearchFactory
65
- searchEngine = await TextSearchFactory.create(effectiveConfig.index_file, effectiveConfig.db_file, factoryOptions);
63
+ // Create search engine using PolymorphicSearchFactory (auto-detects mode)
64
+ searchEngine = await withCLIDatabaseAccess(effectiveConfig.db_file, () => PolymorphicSearchFactory.create(effectiveConfig.index_file, effectiveConfig.db_file), {
65
+ commandName: 'Search command',
66
+ showProgress: true
67
+ });
66
68
  // Prepare search options
67
69
  const searchOptions = {};
68
70
  if (options['top-k'] !== undefined) {
@@ -73,8 +75,18 @@ export async function runSearch(query, options = {}) {
73
75
  }
74
76
  // Perform search
75
77
  const startTime = Date.now();
76
- const results = await searchEngine.search(query, searchOptions);
78
+ let results = await searchEngine.search(query, searchOptions);
77
79
  const searchTime = Date.now() - startTime;
80
+ // Apply content type filter if specified
81
+ const contentTypeFilter = options['content-type'];
82
+ if (contentTypeFilter && contentTypeFilter !== 'all') {
83
+ const originalCount = results.length;
84
+ results = results.filter(r => r.contentType === contentTypeFilter);
85
+ if (results.length < originalCount) {
86
+ console.log(`Filtered to ${results.length} ${contentTypeFilter} result${results.length === 1 ? '' : 's'} (from ${originalCount} total)`);
87
+ console.log('');
88
+ }
89
+ }
78
90
  // Display results
79
91
  if (results.length === 0) {
80
92
  console.log('No results found.');
@@ -86,10 +98,30 @@ export async function runSearch(query, options = {}) {
86
98
  else {
87
99
  console.log(`Found ${results.length} result${results.length === 1 ? '' : 's'} in ${searchTime}ms:\n`);
88
100
  results.forEach((result, index) => {
89
- console.log(`${index + 1}. ${result.document.title}`);
101
+ // Add content type icon for visual distinction
102
+ const contentTypeIcon = result.contentType === 'image' ? '🖼️ ' : '📄 ';
103
+ const contentTypeLabel = result.contentType === 'image' ? '[IMAGE]' : '[TEXT]';
104
+ console.log(`${index + 1}. ${contentTypeIcon}${result.document.title}`);
90
105
  console.log(` Source: ${result.document.source}`);
106
+ console.log(` Type: ${contentTypeLabel}`);
91
107
  console.log(` Score: ${(result.score * 100).toFixed(1)}%`);
92
- console.log(` Text: ${truncateText(result.content, 200)}`);
108
+ // Display content differently based on type
109
+ if (result.contentType === 'image') {
110
+ // For images, show metadata if available
111
+ if (result.metadata?.description) {
112
+ console.log(` Description: ${truncateText(result.metadata.description, 200)}`);
113
+ }
114
+ if (result.metadata?.dimensions) {
115
+ console.log(` Dimensions: ${result.metadata.dimensions}`);
116
+ }
117
+ if (result.metadata?.format) {
118
+ console.log(` Format: ${result.metadata.format}`);
119
+ }
120
+ }
121
+ else {
122
+ // For text, show content preview
123
+ console.log(` Text: ${truncateText(result.content, 200)}`);
124
+ }
93
125
  console.log('');
94
126
  });
95
127
  // Show search statistics
@@ -107,6 +139,13 @@ export async function runSearch(query, options = {}) {
107
139
  if (searchEngine) {
108
140
  await searchEngine.cleanup();
109
141
  }
142
+ // Ensure clean exit for CLI commands
143
+ const { DatabaseConnectionManager } = await import('../core/database-connection-manager.js');
144
+ await DatabaseConnectionManager.closeAllConnections();
145
+ // Force exit for CLI commands to prevent hanging
146
+ setTimeout(() => {
147
+ process.exit(0);
148
+ }, 100);
110
149
  }
111
150
  }
112
151
  catch (error) {
package/dist/cli.js CHANGED
@@ -29,26 +29,40 @@ Examples:
29
29
  raglite ingest ./docs/ # Ingest all .md/.txt files in docs/
30
30
  raglite ingest ./readme.md # Ingest single file
31
31
  raglite ingest ./docs/ --model Xenova/all-mpnet-base-v2 # Use higher quality model
32
+ raglite ingest ./docs/ --mode multimodal # Enable multimodal processing
33
+ raglite ingest ./docs/ --mode multimodal --rerank-strategy metadata # Use metadata reranking
32
34
  raglite ingest ./docs/ --path-strategy relative --path-base /project # Use relative paths
33
35
  raglite search "machine learning" # Search for documents about machine learning
34
36
  raglite search "API documentation" --top-k 10 # Get top 10 results
37
+ raglite search "red car" --content-type image # Search only image results
35
38
 
36
39
  raglite rebuild # Rebuild the entire index
37
40
 
38
41
  Options for search:
39
- --top-k <number> Number of results to return (default: 10)
40
- --rerank Enable reranking for better results
41
- --no-rerank Disable reranking
42
+ --top-k <number> Number of results to return (default: 10)
43
+ --rerank Enable reranking for better results
44
+ --no-rerank Disable reranking
45
+ --content-type <type> Filter results by content type: 'text', 'image', or 'all' (default: all)
42
46
 
43
47
  Options for ingest:
44
48
  --model <name> Use specific embedding model
49
+ --mode <mode> Processing mode: 'text' (default) or 'multimodal'
50
+ --rerank-strategy <strategy> Reranking strategy for multimodal mode
45
51
  --rebuild-if-needed Automatically rebuild if model mismatch detected (WARNING: rebuilds entire index)
46
52
  --path-strategy <strategy> Path storage strategy: 'relative' (default) or 'absolute'
47
53
  --path-base <path> Base directory for relative paths (defaults to current directory)
48
54
 
49
55
  Available models:
50
- sentence-transformers/all-MiniLM-L6-v2 (384 dim, fast, default)
51
- Xenova/all-mpnet-base-v2 (768 dim, higher quality)
56
+ Text mode:
57
+ sentence-transformers/all-MiniLM-L6-v2 (384 dim, fast, default)
58
+ Xenova/all-mpnet-base-v2 (768 dim, higher quality)
59
+ Multimodal mode:
60
+ Xenova/clip-vit-base-patch32 (512 dim, text + image support)
61
+
62
+ Available reranking strategies (multimodal mode):
63
+ text-derived Use image-to-text conversion + cross-encoder (default)
64
+ metadata Use filename and metadata-based scoring
65
+ disabled No reranking, use vector similarity only
52
66
 
53
67
  For more information, visit: https://github.com/your-repo/rag-lite-ts
54
68
  `);
@@ -115,9 +129,13 @@ function validateArgs(command, args, options) {
115
129
  console.error(' raglite ingest ./docs/ # Ingest all .md/.txt files in docs/');
116
130
  console.error(' raglite ingest ./readme.md # Ingest single file');
117
131
  console.error(' raglite ingest ./docs/ --model Xenova/all-mpnet-base-v2 # Use higher quality model');
132
+ console.error(' raglite ingest ./docs/ --mode multimodal # Enable multimodal processing');
133
+ console.error(' raglite ingest ./docs/ --mode multimodal --rerank-strategy metadata # Use metadata reranking');
118
134
  console.error('');
119
135
  console.error('Options:');
120
136
  console.error(' --model <name> Use specific embedding model');
137
+ console.error(' --mode <mode> Processing mode: text (default) or multimodal');
138
+ console.error(' --rerank-strategy <strategy> Reranking strategy for multimodal mode');
121
139
  console.error(' --rebuild-if-needed Automatically rebuild if model mismatch detected');
122
140
  console.error('');
123
141
  console.error('The path can be either a file (.md or .txt) or a directory.');
@@ -134,11 +152,13 @@ function validateArgs(command, args, options) {
134
152
  console.error(' raglite search "machine learning"');
135
153
  console.error(' raglite search "API documentation" --top-k 10');
136
154
  console.error(' raglite search "tutorial" --rerank');
155
+ console.error(' raglite search "red car" --content-type image');
137
156
  console.error('');
138
157
  console.error('Options:');
139
- console.error(' --top-k <number> Number of results to return (default: 10)');
140
- console.error(' --rerank Enable reranking for better results');
141
- console.error(' --no-rerank Disable reranking');
158
+ console.error(' --top-k <number> Number of results to return (default: 10)');
159
+ console.error(' --rerank Enable reranking for better results');
160
+ console.error(' --no-rerank Disable reranking');
161
+ console.error(' --content-type <type> Filter by content type: text, image, or all (default: all)');
142
162
  process.exit(EXIT_CODES.INVALID_ARGUMENTS);
143
163
  }
144
164
  break;
@@ -173,6 +193,96 @@ function validateArgs(command, args, options) {
173
193
  }
174
194
  options['top-k'] = topK;
175
195
  }
196
+ // Validate content-type option (only for search command)
197
+ if (options['content-type'] !== undefined) {
198
+ if (command !== 'search') {
199
+ console.error(`Error: --content-type option is only available for the 'search' command`);
200
+ console.error('');
201
+ console.error('Use this option to filter search results by content type.');
202
+ console.error('');
203
+ console.error('Examples:');
204
+ console.error(' raglite search "query" --content-type text # Only text results');
205
+ console.error(' raglite search "query" --content-type image # Only image results');
206
+ process.exit(EXIT_CODES.INVALID_ARGUMENTS);
207
+ }
208
+ const supportedTypes = ['text', 'image', 'all'];
209
+ if (!supportedTypes.includes(options['content-type'])) {
210
+ console.error(`Error: Unsupported content type '${options['content-type']}'`);
211
+ console.error('');
212
+ console.error('Supported content types:');
213
+ console.error(' text Filter to show only text results');
214
+ console.error(' image Filter to show only image results');
215
+ console.error(' all Show all results (default)');
216
+ console.error('');
217
+ console.error('Examples:');
218
+ console.error(' --content-type text');
219
+ console.error(' --content-type image');
220
+ console.error(' --content-type all');
221
+ process.exit(EXIT_CODES.INVALID_ARGUMENTS);
222
+ }
223
+ }
224
+ // Validate mode option (only for ingest command)
225
+ if (options.mode !== undefined) {
226
+ if (command !== 'ingest') {
227
+ console.error(`Error: --mode option is only available for the 'ingest' command`);
228
+ console.error('');
229
+ console.error('The search command automatically detects the mode from the database.');
230
+ console.error('Mode is set once during ingestion and persists for all searches.');
231
+ console.error('');
232
+ console.error('Examples:');
233
+ console.error(' raglite ingest ./docs/ --mode multimodal');
234
+ console.error(' raglite search "your query" # Uses mode from ingestion');
235
+ process.exit(EXIT_CODES.INVALID_ARGUMENTS);
236
+ }
237
+ const supportedModes = ['text', 'multimodal'];
238
+ if (!supportedModes.includes(options.mode)) {
239
+ console.error(`Error: Unsupported mode '${options.mode}'`);
240
+ console.error('');
241
+ console.error('Supported modes:');
242
+ console.error(' text Process text documents only (default)');
243
+ console.error(' multimodal Process text and image documents');
244
+ console.error('');
245
+ console.error('Examples:');
246
+ console.error(' --mode text');
247
+ console.error(' --mode multimodal');
248
+ process.exit(EXIT_CODES.INVALID_ARGUMENTS);
249
+ }
250
+ }
251
+ // Validate rerank-strategy option (only for ingest command with multimodal mode)
252
+ if (options['rerank-strategy'] !== undefined) {
253
+ if (command !== 'ingest') {
254
+ console.error(`Error: --rerank-strategy option is only available for the 'ingest' command`);
255
+ console.error('');
256
+ console.error('Reranking strategy is configured during ingestion and used automatically during search.');
257
+ process.exit(EXIT_CODES.INVALID_ARGUMENTS);
258
+ }
259
+ const mode = options.mode || 'text';
260
+ if (mode !== 'multimodal') {
261
+ console.error(`Error: --rerank-strategy option is only available in multimodal mode`);
262
+ console.error('');
263
+ console.error('To use reranking strategies, specify --mode multimodal');
264
+ console.error('');
265
+ console.error('Examples:');
266
+ console.error(' raglite ingest ./docs/ --mode multimodal --rerank-strategy text-derived');
267
+ console.error(' raglite ingest ./docs/ --mode multimodal --rerank-strategy metadata');
268
+ process.exit(EXIT_CODES.INVALID_ARGUMENTS);
269
+ }
270
+ const supportedStrategies = ['text-derived', 'metadata', 'disabled'];
271
+ if (!supportedStrategies.includes(options['rerank-strategy'])) {
272
+ console.error(`Error: Unsupported reranking strategy '${options['rerank-strategy']}'`);
273
+ console.error('');
274
+ console.error('Supported strategies for multimodal mode:');
275
+ console.error(' text-derived Convert images to text, then use cross-encoder (default)');
276
+ console.error(' metadata Use filename and metadata-based scoring');
277
+ console.error(' disabled No reranking, use vector similarity only');
278
+ console.error('');
279
+ console.error('Examples:');
280
+ console.error(' --rerank-strategy text-derived');
281
+ console.error(' --rerank-strategy metadata');
282
+ console.error(' --rerank-strategy disabled');
283
+ process.exit(EXIT_CODES.INVALID_ARGUMENTS);
284
+ }
285
+ }
176
286
  // Validate model option (only for ingest command)
177
287
  if (options.model !== undefined) {
178
288
  if (command !== 'ingest') {
@@ -186,20 +296,45 @@ function validateArgs(command, args, options) {
186
296
  console.error(' raglite search "your query" # Uses the model from ingestion');
187
297
  process.exit(EXIT_CODES.INVALID_ARGUMENTS);
188
298
  }
189
- const supportedModels = [
299
+ const mode = options.mode || 'text';
300
+ const textModels = [
190
301
  'sentence-transformers/all-MiniLM-L6-v2',
191
302
  'Xenova/all-mpnet-base-v2'
192
303
  ];
304
+ const multimodalModels = [
305
+ 'Xenova/clip-vit-base-patch32'
306
+ ];
307
+ let supportedModels;
308
+ let modelTypeDescription;
309
+ if (mode === 'multimodal') {
310
+ supportedModels = multimodalModels;
311
+ modelTypeDescription = 'multimodal models';
312
+ }
313
+ else {
314
+ supportedModels = textModels;
315
+ modelTypeDescription = 'text models';
316
+ }
193
317
  if (!supportedModels.includes(options.model)) {
194
- console.error(`Error: Unsupported model '${options.model}'`);
318
+ console.error(`Error: Model '${options.model}' is not supported for ${mode} mode`);
195
319
  console.error('');
196
- console.error('Supported models:');
197
- console.error(' sentence-transformers/all-MiniLM-L6-v2 (384 dim, fast, default)');
198
- console.error(' Xenova/all-mpnet-base-v2 (768 dim, higher quality)');
320
+ if (mode === 'text') {
321
+ console.error('Supported models for text mode:');
322
+ console.error(' sentence-transformers/all-MiniLM-L6-v2 (384 dim, fast, default)');
323
+ console.error(' Xenova/all-mpnet-base-v2 (768 dim, higher quality)');
324
+ }
325
+ else {
326
+ console.error('Supported models for multimodal mode:');
327
+ console.error(' Xenova/clip-vit-base-patch32 (512 dim, text + image support)');
328
+ }
199
329
  console.error('');
200
330
  console.error('Examples:');
201
- console.error(' --model sentence-transformers/all-MiniLM-L6-v2');
202
- console.error(' --model Xenova/all-mpnet-base-v2');
331
+ if (mode === 'text') {
332
+ console.error(' --model sentence-transformers/all-MiniLM-L6-v2');
333
+ console.error(' --model Xenova/all-mpnet-base-v2');
334
+ }
335
+ else {
336
+ console.error(' --model Xenova/clip-vit-base-patch32 --mode multimodal');
337
+ }
203
338
  process.exit(EXIT_CODES.INVALID_ARGUMENTS);
204
339
  }
205
340
  }
@@ -243,6 +378,8 @@ function validateArgs(command, args, options) {
243
378
  * Main CLI entry point
244
379
  */
245
380
  async function main() {
381
+ // Set CLI mode to prevent database connection manager from starting timers
382
+ process.env.RAG_CLI_MODE = 'true';
246
383
  try {
247
384
  const { command, args, options } = parseArgs();
248
385
  // Validate arguments
@@ -340,23 +477,43 @@ process.on('uncaughtException', (error) => {
340
477
  process.exit(EXIT_CODES.GENERAL_ERROR);
341
478
  });
342
479
  // Handle process termination signals gracefully
343
- process.on('SIGINT', () => {
480
+ process.on('SIGINT', async () => {
344
481
  console.log('\n\nReceived SIGINT (Ctrl+C). Shutting down gracefully...');
345
482
  console.log('If you need to force quit, press Ctrl+C again.');
483
+ // Clean up database connections before exit
484
+ try {
485
+ const { DatabaseConnectionManager } = await import('./core/database-connection-manager.js');
486
+ await DatabaseConnectionManager.closeAllConnections();
487
+ }
488
+ catch (error) {
489
+ // Ignore cleanup errors during shutdown
490
+ }
346
491
  process.exit(EXIT_CODES.SUCCESS);
347
492
  });
348
- process.on('SIGTERM', () => {
493
+ process.on('SIGTERM', async () => {
349
494
  console.log('\n\nReceived SIGTERM. Shutting down gracefully...');
350
- process.exit(EXIT_CODES.SUCCESS);
351
- });
352
- // Run the CLI
353
- main().catch((error) => {
354
- console.error('Fatal error:', error instanceof Error ? error.message : String(error));
355
- if (error instanceof ConfigurationError) {
356
- process.exit(error.exitCode);
495
+ // Clean up database connections before exit
496
+ try {
497
+ const { DatabaseConnectionManager } = await import('./core/database-connection-manager.js');
498
+ await DatabaseConnectionManager.closeAllConnections();
357
499
  }
358
- else {
359
- process.exit(EXIT_CODES.GENERAL_ERROR);
500
+ catch (error) {
501
+ // Ignore cleanup errors during shutdown
360
502
  }
503
+ process.exit(EXIT_CODES.SUCCESS);
361
504
  });
505
+ // Run the CLI only if this file is executed directly
506
+ // In ES modules, we need to check import.meta.url instead of require.main
507
+ // Check if this file is being run directly
508
+ if (process.argv[1] === __filename || process.argv[1].endsWith('cli.js')) {
509
+ main().catch((error) => {
510
+ console.error('Fatal error:', error instanceof Error ? error.message : String(error));
511
+ if (error instanceof ConfigurationError) {
512
+ process.exit(error.exitCode);
513
+ }
514
+ else {
515
+ process.exit(EXIT_CODES.GENERAL_ERROR);
516
+ }
517
+ });
518
+ }
362
519
  //# sourceMappingURL=cli.js.map