rag-lite-ts 1.0.1 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (204) hide show
  1. package/README.md +651 -109
  2. package/dist/cli/indexer.js +262 -46
  3. package/dist/cli/search.js +54 -32
  4. package/dist/cli.js +185 -28
  5. package/dist/config.d.ts +34 -73
  6. package/dist/config.js +50 -255
  7. package/dist/core/abstract-embedder.d.ts +125 -0
  8. package/dist/core/abstract-embedder.js +264 -0
  9. package/dist/core/actionable-error-messages.d.ts +60 -0
  10. package/dist/core/actionable-error-messages.js +397 -0
  11. package/dist/core/adapters.d.ts +93 -0
  12. package/dist/core/adapters.js +139 -0
  13. package/dist/core/batch-processing-optimizer.d.ts +155 -0
  14. package/dist/core/batch-processing-optimizer.js +541 -0
  15. package/dist/core/chunker.d.ts +119 -0
  16. package/dist/core/chunker.js +73 -0
  17. package/dist/core/cli-database-utils.d.ts +53 -0
  18. package/dist/core/cli-database-utils.js +239 -0
  19. package/dist/core/config.d.ts +102 -0
  20. package/dist/core/config.js +247 -0
  21. package/dist/core/content-errors.d.ts +111 -0
  22. package/dist/core/content-errors.js +362 -0
  23. package/dist/core/content-manager.d.ts +343 -0
  24. package/dist/core/content-manager.js +1504 -0
  25. package/dist/core/content-performance-optimizer.d.ts +150 -0
  26. package/dist/core/content-performance-optimizer.js +516 -0
  27. package/dist/core/content-resolver.d.ts +104 -0
  28. package/dist/core/content-resolver.js +285 -0
  29. package/dist/core/cross-modal-search.d.ts +164 -0
  30. package/dist/core/cross-modal-search.js +342 -0
  31. package/dist/core/database-connection-manager.d.ts +109 -0
  32. package/dist/core/database-connection-manager.js +304 -0
  33. package/dist/core/db.d.ts +245 -0
  34. package/dist/core/db.js +952 -0
  35. package/dist/core/embedder-factory.d.ts +176 -0
  36. package/dist/core/embedder-factory.js +338 -0
  37. package/dist/{error-handler.d.ts → core/error-handler.d.ts} +23 -2
  38. package/dist/{error-handler.js → core/error-handler.js} +51 -8
  39. package/dist/core/index.d.ts +59 -0
  40. package/dist/core/index.js +69 -0
  41. package/dist/core/ingestion.d.ts +213 -0
  42. package/dist/core/ingestion.js +812 -0
  43. package/dist/core/interfaces.d.ts +408 -0
  44. package/dist/core/interfaces.js +106 -0
  45. package/dist/core/lazy-dependency-loader.d.ts +152 -0
  46. package/dist/core/lazy-dependency-loader.js +453 -0
  47. package/dist/core/mode-detection-service.d.ts +150 -0
  48. package/dist/core/mode-detection-service.js +565 -0
  49. package/dist/core/mode-model-validator.d.ts +92 -0
  50. package/dist/core/mode-model-validator.js +203 -0
  51. package/dist/core/model-registry.d.ts +120 -0
  52. package/dist/core/model-registry.js +415 -0
  53. package/dist/core/model-validator.d.ts +217 -0
  54. package/dist/core/model-validator.js +782 -0
  55. package/dist/{path-manager.d.ts → core/path-manager.d.ts} +5 -0
  56. package/dist/{path-manager.js → core/path-manager.js} +5 -0
  57. package/dist/core/polymorphic-search-factory.d.ts +154 -0
  58. package/dist/core/polymorphic-search-factory.js +344 -0
  59. package/dist/core/raglite-paths.d.ts +121 -0
  60. package/dist/core/raglite-paths.js +145 -0
  61. package/dist/core/reranking-config.d.ts +42 -0
  62. package/dist/core/reranking-config.js +156 -0
  63. package/dist/core/reranking-factory.d.ts +92 -0
  64. package/dist/core/reranking-factory.js +591 -0
  65. package/dist/core/reranking-strategies.d.ts +325 -0
  66. package/dist/core/reranking-strategies.js +720 -0
  67. package/dist/core/resource-cleanup.d.ts +163 -0
  68. package/dist/core/resource-cleanup.js +371 -0
  69. package/dist/core/resource-manager.d.ts +212 -0
  70. package/dist/core/resource-manager.js +564 -0
  71. package/dist/core/search-pipeline.d.ts +111 -0
  72. package/dist/core/search-pipeline.js +287 -0
  73. package/dist/core/search.d.ts +131 -0
  74. package/dist/core/search.js +296 -0
  75. package/dist/core/streaming-operations.d.ts +145 -0
  76. package/dist/core/streaming-operations.js +409 -0
  77. package/dist/core/types.d.ts +66 -0
  78. package/dist/core/types.js +6 -0
  79. package/dist/core/universal-embedder.d.ts +177 -0
  80. package/dist/core/universal-embedder.js +139 -0
  81. package/dist/core/validation-messages.d.ts +99 -0
  82. package/dist/core/validation-messages.js +334 -0
  83. package/dist/{vector-index.d.ts → core/vector-index.d.ts} +4 -0
  84. package/dist/{vector-index.js → core/vector-index.js} +21 -3
  85. package/dist/dom-polyfills.d.ts +6 -0
  86. package/dist/dom-polyfills.js +40 -0
  87. package/dist/factories/index.d.ts +43 -0
  88. package/dist/factories/index.js +44 -0
  89. package/dist/factories/text-factory.d.ts +560 -0
  90. package/dist/factories/text-factory.js +968 -0
  91. package/dist/file-processor.d.ts +90 -4
  92. package/dist/file-processor.js +723 -20
  93. package/dist/index-manager.d.ts +3 -2
  94. package/dist/index-manager.js +13 -11
  95. package/dist/index.d.ts +72 -8
  96. package/dist/index.js +102 -16
  97. package/dist/indexer.js +1 -1
  98. package/dist/ingestion.d.ts +44 -154
  99. package/dist/ingestion.js +75 -671
  100. package/dist/mcp-server.d.ts +35 -3
  101. package/dist/mcp-server.js +1186 -79
  102. package/dist/multimodal/clip-embedder.d.ts +314 -0
  103. package/dist/multimodal/clip-embedder.js +945 -0
  104. package/dist/multimodal/index.d.ts +6 -0
  105. package/dist/multimodal/index.js +6 -0
  106. package/dist/preprocess.js +1 -1
  107. package/dist/run-error-recovery-tests.d.ts +7 -0
  108. package/dist/run-error-recovery-tests.js +101 -0
  109. package/dist/search-standalone.js +1 -1
  110. package/dist/search.d.ts +51 -69
  111. package/dist/search.js +117 -412
  112. package/dist/test-utils.d.ts +8 -26
  113. package/dist/text/chunker.d.ts +33 -0
  114. package/dist/{chunker.js → text/chunker.js} +98 -75
  115. package/dist/{embedder.d.ts → text/embedder.d.ts} +22 -1
  116. package/dist/{embedder.js → text/embedder.js} +84 -10
  117. package/dist/text/index.d.ts +8 -0
  118. package/dist/text/index.js +9 -0
  119. package/dist/text/preprocessors/index.d.ts +17 -0
  120. package/dist/text/preprocessors/index.js +38 -0
  121. package/dist/text/preprocessors/mdx.d.ts +25 -0
  122. package/dist/text/preprocessors/mdx.js +101 -0
  123. package/dist/text/preprocessors/mermaid.d.ts +68 -0
  124. package/dist/text/preprocessors/mermaid.js +330 -0
  125. package/dist/text/preprocessors/registry.d.ts +56 -0
  126. package/dist/text/preprocessors/registry.js +180 -0
  127. package/dist/text/reranker.d.ts +59 -0
  128. package/dist/{reranker.js → text/reranker.js} +138 -53
  129. package/dist/text/sentence-transformer-embedder.d.ts +96 -0
  130. package/dist/text/sentence-transformer-embedder.js +340 -0
  131. package/dist/{tokenizer.d.ts → text/tokenizer.d.ts} +1 -0
  132. package/dist/{tokenizer.js → text/tokenizer.js} +7 -2
  133. package/dist/types.d.ts +40 -1
  134. package/dist/utils/vector-math.d.ts +31 -0
  135. package/dist/utils/vector-math.js +70 -0
  136. package/package.json +16 -4
  137. package/dist/api-errors.d.ts.map +0 -1
  138. package/dist/api-errors.js.map +0 -1
  139. package/dist/chunker.d.ts +0 -47
  140. package/dist/chunker.d.ts.map +0 -1
  141. package/dist/chunker.js.map +0 -1
  142. package/dist/cli/indexer.d.ts.map +0 -1
  143. package/dist/cli/indexer.js.map +0 -1
  144. package/dist/cli/search.d.ts.map +0 -1
  145. package/dist/cli/search.js.map +0 -1
  146. package/dist/cli.d.ts.map +0 -1
  147. package/dist/cli.js.map +0 -1
  148. package/dist/config.d.ts.map +0 -1
  149. package/dist/config.js.map +0 -1
  150. package/dist/db.d.ts +0 -90
  151. package/dist/db.d.ts.map +0 -1
  152. package/dist/db.js +0 -340
  153. package/dist/db.js.map +0 -1
  154. package/dist/embedder.d.ts.map +0 -1
  155. package/dist/embedder.js.map +0 -1
  156. package/dist/error-handler.d.ts.map +0 -1
  157. package/dist/error-handler.js.map +0 -1
  158. package/dist/file-processor.d.ts.map +0 -1
  159. package/dist/file-processor.js.map +0 -1
  160. package/dist/index-manager.d.ts.map +0 -1
  161. package/dist/index-manager.js.map +0 -1
  162. package/dist/index.d.ts.map +0 -1
  163. package/dist/index.js.map +0 -1
  164. package/dist/indexer.d.ts.map +0 -1
  165. package/dist/indexer.js.map +0 -1
  166. package/dist/ingestion.d.ts.map +0 -1
  167. package/dist/ingestion.js.map +0 -1
  168. package/dist/mcp-server.d.ts.map +0 -1
  169. package/dist/mcp-server.js.map +0 -1
  170. package/dist/path-manager.d.ts.map +0 -1
  171. package/dist/path-manager.js.map +0 -1
  172. package/dist/preprocess.d.ts.map +0 -1
  173. package/dist/preprocess.js.map +0 -1
  174. package/dist/preprocessors/index.d.ts.map +0 -1
  175. package/dist/preprocessors/index.js.map +0 -1
  176. package/dist/preprocessors/mdx.d.ts.map +0 -1
  177. package/dist/preprocessors/mdx.js.map +0 -1
  178. package/dist/preprocessors/mermaid.d.ts.map +0 -1
  179. package/dist/preprocessors/mermaid.js.map +0 -1
  180. package/dist/preprocessors/registry.d.ts.map +0 -1
  181. package/dist/preprocessors/registry.js.map +0 -1
  182. package/dist/reranker.d.ts +0 -40
  183. package/dist/reranker.d.ts.map +0 -1
  184. package/dist/reranker.js.map +0 -1
  185. package/dist/resource-manager-demo.d.ts +0 -7
  186. package/dist/resource-manager-demo.d.ts.map +0 -1
  187. package/dist/resource-manager-demo.js +0 -52
  188. package/dist/resource-manager-demo.js.map +0 -1
  189. package/dist/resource-manager.d.ts +0 -129
  190. package/dist/resource-manager.d.ts.map +0 -1
  191. package/dist/resource-manager.js +0 -389
  192. package/dist/resource-manager.js.map +0 -1
  193. package/dist/search-standalone.d.ts.map +0 -1
  194. package/dist/search-standalone.js.map +0 -1
  195. package/dist/search.d.ts.map +0 -1
  196. package/dist/search.js.map +0 -1
  197. package/dist/test-utils.d.ts.map +0 -1
  198. package/dist/test-utils.js.map +0 -1
  199. package/dist/tokenizer.d.ts.map +0 -1
  200. package/dist/tokenizer.js.map +0 -1
  201. package/dist/types.d.ts.map +0 -1
  202. package/dist/types.js.map +0 -1
  203. package/dist/vector-index.d.ts.map +0 -1
  204. package/dist/vector-index.js.map +0 -1
@@ -1,7 +1,110 @@
1
1
  import { existsSync, statSync } from 'fs';
2
2
  import { resolve } from 'path';
3
- import { IngestionPipeline, rebuildIndex } from '../ingestion.js';
4
- import { EXIT_CODES, ConfigurationError } from '../config.js';
3
+ import { TextIngestionFactory } from '../factories/text-factory.js';
4
+ import { withCLIDatabaseAccess, setupCLICleanup, isDatabaseBusy } from '../core/cli-database-utils.js';
5
+ import { EXIT_CODES, ConfigurationError } from '../core/config.js';
6
+ /**
7
+ * Validate mode-specific model and strategy combinations
8
+ * Ensures that the selected model is compatible with the chosen mode
9
+ * and that reranking strategies are valid for the mode
10
+ */
11
+ async function validateModeConfiguration(options) {
12
+ const mode = options.mode || 'text';
13
+ const model = options.embeddingModel;
14
+ const rerankingStrategy = options.rerankingStrategy;
15
+ // Define supported models for each mode
16
+ const textModels = [
17
+ 'sentence-transformers/all-MiniLM-L6-v2',
18
+ 'Xenova/all-mpnet-base-v2'
19
+ ];
20
+ const multimodalModels = [
21
+ 'Xenova/clip-vit-base-patch32'
22
+ ];
23
+ // Validate model compatibility with mode
24
+ if (model) {
25
+ if (mode === 'text' && !textModels.includes(model)) {
26
+ if (multimodalModels.includes(model)) {
27
+ throw new ConfigurationError(`Model '${model}' is a multimodal model but text mode was selected.\n` +
28
+ `\n` +
29
+ `To use this model, specify multimodal mode:\n` +
30
+ ` raglite ingest <path> --mode multimodal --model ${model}\n` +
31
+ `\n` +
32
+ `Or choose a text model for text mode:\n` +
33
+ ` ${textModels.map(m => `raglite ingest <path> --model ${m}`).join('\n ')}\n`, EXIT_CODES.INVALID_ARGUMENTS);
34
+ }
35
+ else {
36
+ throw new ConfigurationError(`Model '${model}' is not supported for text mode.\n` +
37
+ `\n` +
38
+ `Supported models for text mode:\n` +
39
+ ` ${textModels.join('\n ')}\n` +
40
+ `\n` +
41
+ `Examples:\n` +
42
+ ` raglite ingest <path> --model sentence-transformers/all-MiniLM-L6-v2\n` +
43
+ ` raglite ingest <path> --model Xenova/all-mpnet-base-v2\n`, EXIT_CODES.INVALID_ARGUMENTS);
44
+ }
45
+ }
46
+ if (mode === 'multimodal' && !multimodalModels.includes(model)) {
47
+ if (textModels.includes(model)) {
48
+ throw new ConfigurationError(`Model '${model}' is a text-only model but multimodal mode was selected.\n` +
49
+ `\n` +
50
+ `To use this model, specify text mode:\n` +
51
+ ` raglite ingest <path> --mode text --model ${model}\n` +
52
+ `\n` +
53
+ `Or choose a multimodal model for multimodal mode:\n` +
54
+ ` ${multimodalModels.map(m => `raglite ingest <path> --mode multimodal --model ${m}`).join('\n ')}\n`, EXIT_CODES.INVALID_ARGUMENTS);
55
+ }
56
+ else {
57
+ throw new ConfigurationError(`Model '${model}' is not supported for multimodal mode.\n` +
58
+ `\n` +
59
+ `Supported models for multimodal mode:\n` +
60
+ ` ${multimodalModels.join('\n ')}\n` +
61
+ `\n` +
62
+ `Example:\n` +
63
+ ` raglite ingest <path> --mode multimodal --model Xenova/clip-vit-base-patch32\n`, EXIT_CODES.INVALID_ARGUMENTS);
64
+ }
65
+ }
66
+ }
67
+ // Validate reranking strategy compatibility with mode
68
+ if (rerankingStrategy) {
69
+ const textStrategies = ['cross-encoder', 'disabled'];
70
+ const multimodalStrategies = ['text-derived', 'metadata', 'disabled'];
71
+ if (mode === 'text' && !textStrategies.includes(rerankingStrategy)) {
72
+ throw new ConfigurationError(`Reranking strategy '${rerankingStrategy}' is not supported for text mode.\n` +
73
+ `\n` +
74
+ `Supported strategies for text mode:\n` +
75
+ ` cross-encoder Use cross-encoder model for reranking (default)\n` +
76
+ ` disabled No reranking, use vector similarity only\n` +
77
+ `\n` +
78
+ `Examples:\n` +
79
+ ` raglite ingest <path> --mode text --rerank-strategy cross-encoder\n` +
80
+ ` raglite ingest <path> --mode text --rerank-strategy disabled\n`, EXIT_CODES.INVALID_ARGUMENTS);
81
+ }
82
+ if (mode === 'multimodal' && !multimodalStrategies.includes(rerankingStrategy)) {
83
+ throw new ConfigurationError(`Reranking strategy '${rerankingStrategy}' is not supported for multimodal mode.\n` +
84
+ `\n` +
85
+ `Supported strategies for multimodal mode:\n` +
86
+ ` text-derived Convert images to text, then use cross-encoder (default)\n` +
87
+ ` metadata Use filename and metadata-based scoring\n` +
88
+ ` disabled No reranking, use vector similarity only\n` +
89
+ `\n` +
90
+ `Examples:\n` +
91
+ ` raglite ingest <path> --mode multimodal --rerank-strategy text-derived\n` +
92
+ ` raglite ingest <path> --mode multimodal --rerank-strategy metadata\n` +
93
+ ` raglite ingest <path> --mode multimodal --rerank-strategy disabled\n`, EXIT_CODES.INVALID_ARGUMENTS);
94
+ }
95
+ }
96
+ // Log the final configuration
97
+ console.log('✅ Mode configuration validated successfully');
98
+ if (mode !== 'text') {
99
+ console.log(` Mode: ${mode}`);
100
+ }
101
+ if (model) {
102
+ console.log(` Model: ${model}`);
103
+ }
104
+ if (rerankingStrategy) {
105
+ console.log(` Reranking: ${rerankingStrategy}`);
106
+ }
107
+ }
5
108
  /**
6
109
  * Run document ingestion from CLI
7
110
  * @param path - File or directory path to ingest
@@ -9,6 +112,7 @@ import { EXIT_CODES, ConfigurationError } from '../config.js';
9
112
  */
10
113
  export async function runIngest(path, options = {}) {
11
114
  try {
115
+ // Handle --rebuild-if-needed flag immediately to prevent dimension mismatch error
12
116
  // Validate path exists
13
117
  const resolvedPath = resolve(path);
14
118
  if (!existsSync(resolvedPath)) {
@@ -58,50 +162,59 @@ export async function runIngest(path, options = {}) {
58
162
  console.log(`Starting ingestion of ${pathType}: ${resolvedPath}`);
59
163
  console.log('This may take a while for large document collections...');
60
164
  console.log('');
61
- // Create and run ingestion pipeline
62
- const configOverrides = {};
165
+ // Prepare factory options
166
+ const factoryOptions = {};
63
167
  if (options.model) {
64
- configOverrides.embedding_model = options.model;
168
+ factoryOptions.embeddingModel = options.model;
65
169
  console.log(`Using embedding model: ${options.model}`);
66
170
  }
67
- const pipeline = new IngestionPipeline();
68
- pipeline.setConfigOverrides(configOverrides);
69
- // Set path storage strategy if specified
70
- if (options['path-strategy'] || options['path-base']) {
71
- const strategy = options['path-strategy'] || 'relative';
72
- const basePath = options['path-base'] || process.cwd();
73
- pipeline.setPathStorageStrategy(strategy, basePath);
74
- console.log(`Using path storage strategy: ${strategy} (base: ${basePath})`);
171
+ if (options.mode) {
172
+ factoryOptions.mode = options.mode;
173
+ console.log(`Using processing mode: ${options.mode}`);
75
174
  }
76
- try {
77
- // Handle automatic rebuild if needed
78
- if (options.rebuildIfNeeded) {
175
+ if (options['rerank-strategy']) {
176
+ factoryOptions.rerankingStrategy = options['rerank-strategy'];
177
+ console.log(`Using reranking strategy: ${options['rerank-strategy']}`);
178
+ }
179
+ if (options.rebuildIfNeeded) {
180
+ factoryOptions.forceRebuild = true;
181
+ console.log('Force rebuild enabled due to rebuildIfNeeded option');
182
+ // Delete old index file immediately to prevent dimension mismatch errors
183
+ const indexPath = process.env.RAG_INDEX_FILE || './vector-index.bin';
184
+ const { existsSync, unlinkSync } = await import('fs');
185
+ if (existsSync(indexPath)) {
79
186
  try {
80
- await pipeline.initialize();
187
+ unlinkSync(indexPath);
188
+ console.log('🗑️ Removed old index file to prevent dimension mismatch');
81
189
  }
82
190
  catch (error) {
83
- if (error instanceof Error && error.message.includes('Model mismatch detected')) {
84
- console.log('⚠️ Model mismatch detected. Rebuilding index automatically...');
85
- console.log('⚠️ WARNING: This will regenerate ALL embeddings and may take a while.');
86
- console.log('');
87
- // Create a new pipeline with the same config overrides for rebuild
88
- const rebuildPipeline = new IngestionPipeline();
89
- rebuildPipeline.setConfigOverrides(configOverrides);
90
- try {
91
- await rebuildPipeline.initialize();
92
- await rebuildPipeline.rebuildIndex();
93
- }
94
- finally {
95
- await rebuildPipeline.cleanup();
96
- }
97
- console.log('✅ Rebuild completed. Continuing with ingestion...');
98
- console.log('');
99
- }
100
- else {
101
- throw error;
102
- }
191
+ console.warn(`⚠️ Could not remove old index file: ${error}`);
103
192
  }
104
193
  }
194
+ }
195
+ // Validate mode-specific model and strategy combinations
196
+ await validateModeConfiguration(factoryOptions);
197
+ const dbPath = process.env.RAG_DB_FILE || './db.sqlite';
198
+ const indexPath = process.env.RAG_INDEX_FILE || './vector-index.bin';
199
+ // Setup graceful cleanup
200
+ setupCLICleanup(dbPath);
201
+ // Check if database is busy before starting
202
+ const busyStatus = await isDatabaseBusy(dbPath);
203
+ if (busyStatus.isBusy) {
204
+ console.log('⚠️ Database appears to be in use by another process');
205
+ console.log(` Reason: ${busyStatus.reason}`);
206
+ console.log(' Attempting to proceed anyway...');
207
+ console.log('');
208
+ }
209
+ // Create ingestion pipeline using factory
210
+ let pipeline;
211
+ try {
212
+ // Create ingestion pipeline using TextIngestionFactory with database protection
213
+ pipeline = await withCLIDatabaseAccess(dbPath, () => TextIngestionFactory.create(dbPath, indexPath, factoryOptions), {
214
+ commandName: 'Ingestion command',
215
+ showProgress: true,
216
+ maxWaitMs: 15000 // Longer timeout for ingestion
217
+ });
105
218
  const result = await pipeline.ingestPath(resolvedPath);
106
219
  // Display final results
107
220
  console.log('\n' + '='.repeat(50));
@@ -122,10 +235,26 @@ export async function runIngest(path, options = {}) {
122
235
  console.log(`Processing rate: ${chunksPerSecond} chunks/second`);
123
236
  }
124
237
  console.log('\nIngestion completed successfully!');
238
+ // Display mode-specific information
239
+ const mode = options.mode || 'text';
240
+ if (mode === 'multimodal') {
241
+ console.log('✨ Multimodal mode enabled - you can now search across text and image content');
242
+ }
125
243
  console.log('You can now search your documents using: raglite search "your query"');
244
+ console.log('');
245
+ console.log('💡 The search command will automatically detect and use the ingestion mode.');
126
246
  }
127
247
  finally {
128
- await pipeline.cleanup();
248
+ if (pipeline) {
249
+ await pipeline.cleanup();
250
+ }
251
+ // Ensure clean exit for CLI commands
252
+ const { DatabaseConnectionManager } = await import('../core/database-connection-manager.js');
253
+ await DatabaseConnectionManager.closeAllConnections();
254
+ // Force exit for CLI commands to prevent hanging
255
+ setTimeout(() => {
256
+ process.exit(0);
257
+ }, 100);
129
258
  }
130
259
  }
131
260
  catch (error) {
@@ -205,14 +334,101 @@ export async function runRebuild() {
205
334
  console.log('');
206
335
  console.log('Progress will be shown below...');
207
336
  console.log('');
208
- await rebuildIndex();
209
- console.log('\n' + '='.repeat(50));
210
- console.log('REBUILD COMPLETE');
211
- console.log('='.repeat(50));
212
- console.log('The vector index has been successfully rebuilt.');
213
- console.log('All embeddings have been regenerated with the current model.');
214
- console.log('');
215
- console.log('You can now search your documents using: raglite search "your query"');
337
+ // Detect mode from existing database for rebuild
338
+ const dbPath = process.env.RAG_DB_FILE || './db.sqlite';
339
+ const indexPath = process.env.RAG_INDEX_FILE || './vector-index.bin';
340
+ let rebuildOptions = { forceRebuild: true };
341
+ if (existsSync(dbPath)) {
342
+ try {
343
+ // Import mode detection service
344
+ const { ModeDetectionService } = await import('../core/mode-detection-service.js');
345
+ const modeService = new ModeDetectionService(dbPath);
346
+ const systemInfo = await modeService.detectMode();
347
+ console.log(`🎯 Detected existing configuration:`);
348
+ console.log(` Mode: ${systemInfo.mode}`);
349
+ console.log(` Model: ${systemInfo.modelName}`);
350
+ console.log(` Reranking: ${systemInfo.rerankingStrategy}`);
351
+ console.log('');
352
+ // Use the detected configuration for rebuild
353
+ rebuildOptions.mode = systemInfo.mode;
354
+ rebuildOptions.embeddingModel = systemInfo.modelName;
355
+ rebuildOptions.rerankingStrategy = systemInfo.rerankingStrategy;
356
+ }
357
+ catch (error) {
358
+ console.warn('⚠️ Could not detect existing mode configuration, using defaults');
359
+ console.warn(` Error: ${error instanceof Error ? error.message : 'Unknown error'}`);
360
+ }
361
+ }
362
+ // Create ingestion pipeline with force rebuild using factory
363
+ const pipeline = await TextIngestionFactory.create(dbPath, indexPath, rebuildOptions);
364
+ try {
365
+ // Get all documents from database and re-ingest them
366
+ const { openDatabase } = await import('../core/db.js');
367
+ const db = await openDatabase(dbPath);
368
+ try {
369
+ const documents = await db.all('SELECT DISTINCT source FROM documents ORDER BY source');
370
+ if (documents.length === 0) {
371
+ throw new Error('No documents found in database. Nothing to rebuild.');
372
+ }
373
+ console.log(`Found ${documents.length} document${documents.length === 1 ? '' : 's'} to rebuild`);
374
+ let totalResult = {
375
+ documentsProcessed: 0,
376
+ chunksCreated: 0,
377
+ embeddingsGenerated: 0,
378
+ documentErrors: 0,
379
+ embeddingErrors: 0,
380
+ processingTimeMs: 0
381
+ };
382
+ // Re-ingest each document
383
+ for (const doc of documents) {
384
+ if (existsSync(doc.source)) {
385
+ console.log(`Rebuilding: ${doc.source}`);
386
+ const result = await pipeline.ingestFile(doc.source);
387
+ totalResult.documentsProcessed += result.documentsProcessed;
388
+ totalResult.chunksCreated += result.chunksCreated;
389
+ totalResult.embeddingsGenerated += result.embeddingsGenerated;
390
+ totalResult.documentErrors += result.documentErrors;
391
+ totalResult.embeddingErrors += result.embeddingErrors;
392
+ totalResult.processingTimeMs += result.processingTimeMs;
393
+ }
394
+ else {
395
+ console.warn(`⚠️ Document not found, skipping: ${doc.source}`);
396
+ totalResult.documentErrors++;
397
+ }
398
+ }
399
+ console.log('\n' + '='.repeat(50));
400
+ console.log('REBUILD COMPLETE');
401
+ console.log('='.repeat(50));
402
+ console.log(`Documents processed: ${totalResult.documentsProcessed}`);
403
+ console.log(`Chunks created: ${totalResult.chunksCreated}`);
404
+ console.log(`Embeddings generated: ${totalResult.embeddingsGenerated}`);
405
+ if (totalResult.documentErrors > 0) {
406
+ console.log(`Document errors: ${totalResult.documentErrors}`);
407
+ }
408
+ if (totalResult.embeddingErrors > 0) {
409
+ console.log(`Embedding errors: ${totalResult.embeddingErrors}`);
410
+ }
411
+ console.log(`Total processing time: ${(totalResult.processingTimeMs / 1000).toFixed(2)} seconds`);
412
+ console.log('');
413
+ console.log('The vector index has been successfully rebuilt.');
414
+ console.log('All embeddings have been regenerated with the current model.');
415
+ console.log('');
416
+ console.log('You can now search your documents using: raglite search "your query"');
417
+ }
418
+ finally {
419
+ await db.close();
420
+ }
421
+ }
422
+ finally {
423
+ await pipeline.cleanup();
424
+ // Ensure clean exit for CLI commands
425
+ const { DatabaseConnectionManager } = await import('../core/database-connection-manager.js');
426
+ await DatabaseConnectionManager.closeAllConnections();
427
+ // Force exit for CLI commands to prevent hanging
428
+ setTimeout(() => {
429
+ process.exit(0);
430
+ }, 100);
431
+ }
216
432
  }
217
433
  catch (error) {
218
434
  console.error('\n' + '='.repeat(50));
@@ -1,6 +1,7 @@
1
1
  import { existsSync } from 'fs';
2
- import { openDatabase } from '../db.js';
3
- import { config, EXIT_CODES, ConfigurationError } from '../config.js';
2
+ import { PolymorphicSearchFactory } from '../core/polymorphic-search-factory.js';
3
+ import { withCLIDatabaseAccess, setupCLICleanup } from '../core/cli-database-utils.js';
4
+ import { config, EXIT_CODES, ConfigurationError } from '../core/config.js';
4
5
  /**
5
6
  * Run search from CLI
6
7
  * @param query - Search query string
@@ -53,33 +54,17 @@ export async function runSearch(query, options = {}) {
53
54
  process.exit(EXIT_CODES.INDEX_ERROR);
54
55
  }
55
56
  console.log(`Searching for: "${query}"`);
56
- console.log('Loading search engine...');
57
57
  console.log('');
58
- // Initialize components
59
- let db, embedder, indexManager, searchEngine;
58
+ // Setup graceful cleanup
59
+ setupCLICleanup(effectiveConfig.db_file);
60
+ // Initialize search engine using polymorphic factory with database protection
61
+ let searchEngine;
60
62
  try {
61
- // Open database connection
62
- db = await openDatabase(effectiveConfig.db_file);
63
- // Read the model that was used during ingestion from the database
64
- const { getStoredModelInfo } = await import('../db.js');
65
- const storedModel = await getStoredModelInfo(db);
66
- const modelToUse = storedModel ? storedModel.modelName : effectiveConfig.embedding_model;
67
- console.log(`Using model from ingestion: ${modelToUse}`);
68
- // Initialize embedding engine with the correct model
69
- const { EmbeddingEngine } = await import('../embedder.js');
70
- embedder = new EmbeddingEngine(modelToUse);
71
- await embedder.loadModel();
72
- // Initialize index manager (which handles the vector index and hash mapping)
73
- const { IndexManager } = await import('../index-manager.js');
74
- const { getModelDefaults } = await import('../config.js');
75
- const modelDefaults = getModelDefaults(modelToUse);
76
- indexManager = new IndexManager(effectiveConfig.index_file, effectiveConfig.db_file, modelDefaults.dimensions, modelToUse);
77
- await indexManager.initialize();
78
- // Create search engine with index manager
79
- const enableReranking = options.rerank !== undefined ? options.rerank : effectiveConfig.rerank_enabled;
80
- const { SearchEngine } = await import('../search.js');
81
- searchEngine = SearchEngine.createWithComponents(embedder, indexManager, db, enableReranking);
82
- await searchEngine.initialize();
63
+ // Create search engine using PolymorphicSearchFactory (auto-detects mode)
64
+ searchEngine = await withCLIDatabaseAccess(effectiveConfig.db_file, () => PolymorphicSearchFactory.create(effectiveConfig.index_file, effectiveConfig.db_file), {
65
+ commandName: 'Search command',
66
+ showProgress: true
67
+ });
83
68
  // Prepare search options
84
69
  const searchOptions = {};
85
70
  if (options['top-k'] !== undefined) {
@@ -90,8 +75,18 @@ export async function runSearch(query, options = {}) {
90
75
  }
91
76
  // Perform search
92
77
  const startTime = Date.now();
93
- const results = await searchEngine.search(query, searchOptions);
78
+ let results = await searchEngine.search(query, searchOptions);
94
79
  const searchTime = Date.now() - startTime;
80
+ // Apply content type filter if specified
81
+ const contentTypeFilter = options['content-type'];
82
+ if (contentTypeFilter && contentTypeFilter !== 'all') {
83
+ const originalCount = results.length;
84
+ results = results.filter(r => r.contentType === contentTypeFilter);
85
+ if (results.length < originalCount) {
86
+ console.log(`Filtered to ${results.length} ${contentTypeFilter} result${results.length === 1 ? '' : 's'} (from ${originalCount} total)`);
87
+ console.log('');
88
+ }
89
+ }
95
90
  // Display results
96
91
  if (results.length === 0) {
97
92
  console.log('No results found.');
@@ -103,10 +98,30 @@ export async function runSearch(query, options = {}) {
103
98
  else {
104
99
  console.log(`Found ${results.length} result${results.length === 1 ? '' : 's'} in ${searchTime}ms:\n`);
105
100
  results.forEach((result, index) => {
106
- console.log(`${index + 1}. ${result.document.title}`);
101
+ // Add content type icon for visual distinction
102
+ const contentTypeIcon = result.contentType === 'image' ? '🖼️ ' : '📄 ';
103
+ const contentTypeLabel = result.contentType === 'image' ? '[IMAGE]' : '[TEXT]';
104
+ console.log(`${index + 1}. ${contentTypeIcon}${result.document.title}`);
107
105
  console.log(` Source: ${result.document.source}`);
106
+ console.log(` Type: ${contentTypeLabel}`);
108
107
  console.log(` Score: ${(result.score * 100).toFixed(1)}%`);
109
- console.log(` Text: ${truncateText(result.text, 200)}`);
108
+ // Display content differently based on type
109
+ if (result.contentType === 'image') {
110
+ // For images, show metadata if available
111
+ if (result.metadata?.description) {
112
+ console.log(` Description: ${truncateText(result.metadata.description, 200)}`);
113
+ }
114
+ if (result.metadata?.dimensions) {
115
+ console.log(` Dimensions: ${result.metadata.dimensions}`);
116
+ }
117
+ if (result.metadata?.format) {
118
+ console.log(` Format: ${result.metadata.format}`);
119
+ }
120
+ }
121
+ else {
122
+ // For text, show content preview
123
+ console.log(` Text: ${truncateText(result.content, 200)}`);
124
+ }
110
125
  console.log('');
111
126
  });
112
127
  // Show search statistics
@@ -121,9 +136,16 @@ export async function runSearch(query, options = {}) {
121
136
  }
122
137
  finally {
123
138
  // Cleanup resources
124
- if (db) {
125
- await db.close();
139
+ if (searchEngine) {
140
+ await searchEngine.cleanup();
126
141
  }
142
+ // Ensure clean exit for CLI commands
143
+ const { DatabaseConnectionManager } = await import('../core/database-connection-manager.js');
144
+ await DatabaseConnectionManager.closeAllConnections();
145
+ // Force exit for CLI commands to prevent hanging
146
+ setTimeout(() => {
147
+ process.exit(0);
148
+ }, 100);
127
149
  }
128
150
  }
129
151
  catch (error) {