rag-lite-ts 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (123) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +240 -0
  3. package/dist/api-errors.d.ts +90 -0
  4. package/dist/api-errors.d.ts.map +1 -0
  5. package/dist/api-errors.js +320 -0
  6. package/dist/api-errors.js.map +1 -0
  7. package/dist/chunker.d.ts +47 -0
  8. package/dist/chunker.d.ts.map +1 -0
  9. package/dist/chunker.js +256 -0
  10. package/dist/chunker.js.map +1 -0
  11. package/dist/cli/indexer.d.ts +11 -0
  12. package/dist/cli/indexer.d.ts.map +1 -0
  13. package/dist/cli/indexer.js +272 -0
  14. package/dist/cli/indexer.js.map +1 -0
  15. package/dist/cli/search.d.ts +7 -0
  16. package/dist/cli/search.d.ts.map +1 -0
  17. package/dist/cli/search.js +206 -0
  18. package/dist/cli/search.js.map +1 -0
  19. package/dist/cli.d.ts +3 -0
  20. package/dist/cli.d.ts.map +1 -0
  21. package/dist/cli.js +362 -0
  22. package/dist/cli.js.map +1 -0
  23. package/dist/config.d.ts +90 -0
  24. package/dist/config.d.ts.map +1 -0
  25. package/dist/config.js +281 -0
  26. package/dist/config.js.map +1 -0
  27. package/dist/db.d.ts +90 -0
  28. package/dist/db.d.ts.map +1 -0
  29. package/dist/db.js +340 -0
  30. package/dist/db.js.map +1 -0
  31. package/dist/embedder.d.ts +101 -0
  32. package/dist/embedder.d.ts.map +1 -0
  33. package/dist/embedder.js +323 -0
  34. package/dist/embedder.js.map +1 -0
  35. package/dist/error-handler.d.ts +91 -0
  36. package/dist/error-handler.d.ts.map +1 -0
  37. package/dist/error-handler.js +196 -0
  38. package/dist/error-handler.js.map +1 -0
  39. package/dist/file-processor.d.ts +59 -0
  40. package/dist/file-processor.d.ts.map +1 -0
  41. package/dist/file-processor.js +312 -0
  42. package/dist/file-processor.js.map +1 -0
  43. package/dist/index-manager.d.ts +99 -0
  44. package/dist/index-manager.d.ts.map +1 -0
  45. package/dist/index-manager.js +444 -0
  46. package/dist/index-manager.js.map +1 -0
  47. package/dist/index.d.ts +13 -0
  48. package/dist/index.d.ts.map +1 -0
  49. package/dist/index.js +21 -0
  50. package/dist/index.js.map +1 -0
  51. package/dist/indexer.d.ts +7 -0
  52. package/dist/indexer.d.ts.map +1 -0
  53. package/dist/indexer.js +51 -0
  54. package/dist/indexer.js.map +1 -0
  55. package/dist/ingestion.d.ts +175 -0
  56. package/dist/ingestion.d.ts.map +1 -0
  57. package/dist/ingestion.js +705 -0
  58. package/dist/ingestion.js.map +1 -0
  59. package/dist/mcp-server.d.ts +14 -0
  60. package/dist/mcp-server.d.ts.map +1 -0
  61. package/dist/mcp-server.js +680 -0
  62. package/dist/mcp-server.js.map +1 -0
  63. package/dist/path-manager.d.ts +42 -0
  64. package/dist/path-manager.d.ts.map +1 -0
  65. package/dist/path-manager.js +66 -0
  66. package/dist/path-manager.js.map +1 -0
  67. package/dist/preprocess.d.ts +19 -0
  68. package/dist/preprocess.d.ts.map +1 -0
  69. package/dist/preprocess.js +203 -0
  70. package/dist/preprocess.js.map +1 -0
  71. package/dist/preprocessors/index.d.ts +17 -0
  72. package/dist/preprocessors/index.d.ts.map +1 -0
  73. package/dist/preprocessors/index.js +38 -0
  74. package/dist/preprocessors/index.js.map +1 -0
  75. package/dist/preprocessors/mdx.d.ts +25 -0
  76. package/dist/preprocessors/mdx.d.ts.map +1 -0
  77. package/dist/preprocessors/mdx.js +101 -0
  78. package/dist/preprocessors/mdx.js.map +1 -0
  79. package/dist/preprocessors/mermaid.d.ts +68 -0
  80. package/dist/preprocessors/mermaid.d.ts.map +1 -0
  81. package/dist/preprocessors/mermaid.js +329 -0
  82. package/dist/preprocessors/mermaid.js.map +1 -0
  83. package/dist/preprocessors/registry.d.ts +56 -0
  84. package/dist/preprocessors/registry.d.ts.map +1 -0
  85. package/dist/preprocessors/registry.js +179 -0
  86. package/dist/preprocessors/registry.js.map +1 -0
  87. package/dist/reranker.d.ts +40 -0
  88. package/dist/reranker.d.ts.map +1 -0
  89. package/dist/reranker.js +212 -0
  90. package/dist/reranker.js.map +1 -0
  91. package/dist/resource-manager-demo.d.ts +7 -0
  92. package/dist/resource-manager-demo.d.ts.map +1 -0
  93. package/dist/resource-manager-demo.js +52 -0
  94. package/dist/resource-manager-demo.js.map +1 -0
  95. package/dist/resource-manager.d.ts +129 -0
  96. package/dist/resource-manager.d.ts.map +1 -0
  97. package/dist/resource-manager.js +389 -0
  98. package/dist/resource-manager.js.map +1 -0
  99. package/dist/search-standalone.d.ts +7 -0
  100. package/dist/search-standalone.d.ts.map +1 -0
  101. package/dist/search-standalone.js +117 -0
  102. package/dist/search-standalone.js.map +1 -0
  103. package/dist/search.d.ts +92 -0
  104. package/dist/search.d.ts.map +1 -0
  105. package/dist/search.js +454 -0
  106. package/dist/search.js.map +1 -0
  107. package/dist/test-utils.d.ts +36 -0
  108. package/dist/test-utils.d.ts.map +1 -0
  109. package/dist/test-utils.js +27 -0
  110. package/dist/test-utils.js.map +1 -0
  111. package/dist/tokenizer.d.ts +21 -0
  112. package/dist/tokenizer.d.ts.map +1 -0
  113. package/dist/tokenizer.js +59 -0
  114. package/dist/tokenizer.js.map +1 -0
  115. package/dist/types.d.ts +44 -0
  116. package/dist/types.d.ts.map +1 -0
  117. package/dist/types.js +3 -0
  118. package/dist/types.js.map +1 -0
  119. package/dist/vector-index.d.ts +64 -0
  120. package/dist/vector-index.d.ts.map +1 -0
  121. package/dist/vector-index.js +308 -0
  122. package/dist/vector-index.js.map +1 -0
  123. package/package.json +80 -0
@@ -0,0 +1,680 @@
1
+ #!/usr/bin/env node
2
+ /**
3
+ * MCP server entry point for rag-lite-ts
4
+ *
5
+ * This is a thin wrapper around existing search and indexing functions
6
+ * that exposes them as MCP tools without creating REST/GraphQL endpoints.
7
+ *
8
+ * The MCP server lives in the same package as CLI with dual entry points
9
+ * and provides proper MCP tool definitions for search and indexing capabilities.
10
+ *
11
+ * Requirements addressed: 6.2, 6.4, 6.5, 6.6
12
+ */
13
+ import { Server } from '@modelcontextprotocol/sdk/server/index.js';
14
+ import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js';
15
+ import { CallToolRequestSchema, ListToolsRequestSchema, } from '@modelcontextprotocol/sdk/types.js';
16
+ import { existsSync, statSync } from 'fs';
17
+ import { resolve } from 'path';
18
+ import { SearchEngine } from './search.js';
19
+ import { IngestionPipeline, rebuildIndex } from './ingestion.js';
20
+ import { initializeEmbeddingEngine } from './embedder.js';
21
+ import { openDatabase } from './db.js';
22
+ import { config, validateConfig, ConfigurationError } from './config.js';
23
+ /**
24
+ * MCP Server class that wraps RAG-lite TS functionality
25
+ * Implements MCP protocol interface without creating REST/GraphQL endpoints
26
+ */
27
+ class RagLiteMCPServer {
28
+ server;
29
+ searchEngine = null;
30
+ isSearchEngineInitialized = false;
31
+ constructor() {
32
+ this.server = new Server({
33
+ name: 'rag-lite-ts',
34
+ version: '1.0.0',
35
+ }, {
36
+ capabilities: {
37
+ tools: {},
38
+ },
39
+ });
40
+ this.setupToolHandlers();
41
+ }
42
+ /**
43
+ * Set up MCP tool handlers for search and indexing capabilities
44
+ * Add proper MCP tool definitions for search and indexing capabilities
45
+ */
46
+ setupToolHandlers() {
47
+ // List available tools
48
+ this.server.setRequestHandler(ListToolsRequestSchema, async () => {
49
+ return {
50
+ tools: [
51
+ {
52
+ name: 'search',
53
+ description: 'Search indexed documents using semantic similarity. Returns relevant document chunks with scores and metadata.',
54
+ inputSchema: {
55
+ type: 'object',
56
+ properties: {
57
+ query: {
58
+ type: 'string',
59
+ description: 'Search query string to find relevant documents',
60
+ minLength: 1,
61
+ maxLength: 500
62
+ },
63
+ top_k: {
64
+ type: 'number',
65
+ description: 'Number of results to return (default: 10, max: 100)',
66
+ minimum: 1,
67
+ maximum: 100,
68
+ default: 10
69
+ },
70
+ rerank: {
71
+ type: 'boolean',
72
+ description: 'Enable reranking for better result quality (default: false)',
73
+ default: false
74
+ }
75
+ },
76
+ required: ['query'],
77
+ additionalProperties: false
78
+ }
79
+ },
80
+ {
81
+ name: 'ingest',
82
+ description: 'Ingest documents from a file or directory path. Processes .md and .txt files, chunks them, generates embeddings, and stores in the search index.',
83
+ inputSchema: {
84
+ type: 'object',
85
+ properties: {
86
+ path: {
87
+ type: 'string',
88
+ description: 'File or directory path to ingest. Can be a single .md/.txt file or directory containing such files.'
89
+ },
90
+ model: {
91
+ type: 'string',
92
+ description: 'Embedding model to use (default: sentence-transformers/all-MiniLM-L6-v2). Options: sentence-transformers/all-MiniLM-L6-v2, Xenova/all-mpnet-base-v2',
93
+ enum: ['sentence-transformers/all-MiniLM-L6-v2', 'Xenova/all-mpnet-base-v2']
94
+ },
95
+ force_rebuild: {
96
+ type: 'boolean',
97
+ description: 'Force rebuild of the entire index (default: false)',
98
+ default: false
99
+ }
100
+ },
101
+ required: ['path'],
102
+ additionalProperties: false
103
+ }
104
+ },
105
+ {
106
+ name: 'rebuild_index',
107
+ description: 'Rebuild the entire vector index from scratch. Useful when model version changes or for maintenance. This will regenerate all embeddings.',
108
+ inputSchema: {
109
+ type: 'object',
110
+ properties: {},
111
+ additionalProperties: false
112
+ }
113
+ },
114
+ {
115
+ name: 'get_stats',
116
+ description: 'Get statistics about the current search index including number of documents, chunks, and index status.',
117
+ inputSchema: {
118
+ type: 'object',
119
+ properties: {},
120
+ additionalProperties: false
121
+ }
122
+ }
123
+ ],
124
+ };
125
+ });
126
+ // Handle tool calls
127
+ this.server.setRequestHandler(CallToolRequestSchema, async (request) => {
128
+ const { name, arguments: args } = request.params;
129
+ try {
130
+ switch (name) {
131
+ case 'search':
132
+ return await this.handleSearch(args);
133
+ case 'ingest':
134
+ return await this.handleIngest(args);
135
+ case 'rebuild_index':
136
+ return await this.handleRebuildIndex(args);
137
+ case 'get_stats':
138
+ return await this.handleGetStats(args);
139
+ default:
140
+ throw new Error(`Unknown tool: ${name}`);
141
+ }
142
+ }
143
+ catch (error) {
144
+ const errorMessage = error instanceof Error ? error.message : String(error);
145
+ return {
146
+ content: [
147
+ {
148
+ type: 'text',
149
+ text: `Error: ${errorMessage}`,
150
+ },
151
+ ],
152
+ };
153
+ }
154
+ });
155
+ }
156
+ /**
157
+ * Handle search tool calls
158
+ * Wraps existing search functionality as MCP tool
159
+ */
160
+ async handleSearch(args) {
161
+ try {
162
+ // Validate arguments
163
+ if (!args.query || typeof args.query !== 'string') {
164
+ throw new Error('Query parameter is required and must be a string');
165
+ }
166
+ if (args.query.trim().length === 0) {
167
+ throw new Error('Query cannot be empty');
168
+ }
169
+ if (args.query.length > 500) {
170
+ throw new Error('Query is too long (maximum 500 characters)');
171
+ }
172
+ // Validate optional parameters
173
+ if (args.top_k !== undefined) {
174
+ if (typeof args.top_k !== 'number' || args.top_k < 1 || args.top_k > 100) {
175
+ throw new Error('top_k must be a number between 1 and 100');
176
+ }
177
+ }
178
+ if (args.rerank !== undefined && typeof args.rerank !== 'boolean') {
179
+ throw new Error('rerank must be a boolean');
180
+ }
181
+ // Check if database and index exist
182
+ if (!existsSync(config.db_file)) {
183
+ throw new Error('No database found. You need to ingest documents first using the ingest tool.');
184
+ }
185
+ if (!existsSync(config.index_file)) {
186
+ throw new Error('No vector index found. The ingestion may not have completed successfully. Try using the ingest tool or rebuild_index tool.');
187
+ }
188
+ // Initialize search engine if needed
189
+ if (!this.isSearchEngineInitialized) {
190
+ await this.initializeSearchEngine();
191
+ }
192
+ // Prepare search options
193
+ const searchOptions = {
194
+ top_k: args.top_k || config.top_k || 10,
195
+ rerank: args.rerank !== undefined ? args.rerank : config.rerank_enabled
196
+ };
197
+ // Perform search using existing search functionality
198
+ const startTime = Date.now();
199
+ const results = await this.searchEngine.search(args.query, searchOptions);
200
+ const searchTime = Date.now() - startTime;
201
+ // Format results for MCP response
202
+ const formattedResults = {
203
+ query: args.query,
204
+ results_count: results.length,
205
+ search_time_ms: searchTime,
206
+ results: results.map((result, index) => ({
207
+ rank: index + 1,
208
+ score: Math.round(result.score * 100) / 100, // Round to 2 decimal places
209
+ document: {
210
+ id: result.document.id,
211
+ title: result.document.title,
212
+ source: result.document.source
213
+ },
214
+ text: result.text
215
+ }))
216
+ };
217
+ return {
218
+ content: [
219
+ {
220
+ type: 'text',
221
+ text: JSON.stringify(formattedResults, null, 2),
222
+ },
223
+ ],
224
+ };
225
+ }
226
+ catch (error) {
227
+ // Handle model mismatch errors specifically
228
+ if (error instanceof Error && error.message.includes('Model mismatch detected')) {
229
+ const modelMismatchError = {
230
+ error: 'MODEL_MISMATCH',
231
+ message: 'Cannot perform search due to model mismatch',
232
+ details: error.message,
233
+ resolution: {
234
+ action: 'manual_intervention_required',
235
+ explanation: 'The embedding model configuration does not match the indexed data. Please verify your setup before proceeding.',
236
+ options: [
237
+ 'Check if the model mismatch is intentional',
238
+ 'If you want to use a different model, manually run the rebuild_index tool',
239
+ 'Verify your model configuration matches your indexing setup'
240
+ ],
241
+ warning: 'Rebuilding will regenerate all embeddings and may take significant time'
242
+ }
243
+ };
244
+ return {
245
+ content: [
246
+ {
247
+ type: 'text',
248
+ text: JSON.stringify(modelMismatchError, null, 2),
249
+ },
250
+ ],
251
+ };
252
+ }
253
+ // Handle dimension mismatch errors
254
+ if (error instanceof Error && error.message.includes('dimension mismatch')) {
255
+ const dimensionMismatchError = {
256
+ error: 'DIMENSION_MISMATCH',
257
+ message: 'Cannot perform search due to vector dimension mismatch',
258
+ details: error.message,
259
+ resolution: {
260
+ action: 'manual_intervention_required',
261
+ explanation: 'The vector dimensions do not match between the current model and the indexed data. Please verify your setup before proceeding.',
262
+ options: [
263
+ 'Check your model configuration',
264
+ 'If you want to change models, manually run the rebuild_index tool',
265
+ 'Ensure consistency between indexing and search models'
266
+ ],
267
+ warning: 'Rebuilding will regenerate all embeddings and may take significant time'
268
+ }
269
+ };
270
+ return {
271
+ content: [
272
+ {
273
+ type: 'text',
274
+ text: JSON.stringify(dimensionMismatchError, null, 2),
275
+ },
276
+ ],
277
+ };
278
+ }
279
+ // Re-throw other errors to be handled by the main error handler
280
+ throw error;
281
+ }
282
+ }
283
+ /**
284
+ * Handle ingest tool calls
285
+ * Wraps existing ingestion functionality as MCP tool
286
+ */
287
+ async handleIngest(args) {
288
+ try {
289
+ // Validate arguments
290
+ if (!args.path || typeof args.path !== 'string') {
291
+ throw new Error('Path parameter is required and must be a string');
292
+ }
293
+ // Validate path exists
294
+ const resolvedPath = resolve(args.path);
295
+ if (!existsSync(resolvedPath)) {
296
+ throw new Error(`Path does not exist: ${args.path}`);
297
+ }
298
+ // Check if it's a file or directory and validate
299
+ let stats;
300
+ try {
301
+ stats = statSync(resolvedPath);
302
+ }
303
+ catch (error) {
304
+ throw new Error(`Cannot access path: ${args.path}. Check permissions.`);
305
+ }
306
+ // Validate file type for single files
307
+ if (stats.isFile()) {
308
+ const validExtensions = ['.md', '.txt'];
309
+ const hasValidExtension = validExtensions.some(ext => args.path.toLowerCase().endsWith(ext));
310
+ if (!hasValidExtension) {
311
+ throw new Error(`Unsupported file type: ${args.path}. Supported types: .md, .txt`);
312
+ }
313
+ }
314
+ // Validate model parameter if provided
315
+ if (args.model && !['sentence-transformers/all-MiniLM-L6-v2', 'Xenova/all-mpnet-base-v2'].includes(args.model)) {
316
+ throw new Error(`Unsupported model: ${args.model}. Supported models: sentence-transformers/all-MiniLM-L6-v2, Xenova/all-mpnet-base-v2`);
317
+ }
318
+ // Create config overrides if model is specified
319
+ const configOverrides = args.model ? { embedding_model: args.model } : {};
320
+ // Create and run ingestion pipeline using existing functionality
321
+ const pipeline = new IngestionPipeline();
322
+ pipeline.setConfigOverrides(configOverrides);
323
+ try {
324
+ const result = await pipeline.ingestPath(resolvedPath, {
325
+ forceRebuild: args.force_rebuild || false
326
+ });
327
+ // Reset search engine initialization flag since index may have changed
328
+ this.isSearchEngineInitialized = false;
329
+ this.searchEngine = null;
330
+ // Format results for MCP response
331
+ const ingestionSummary = {
332
+ path: resolvedPath,
333
+ path_type: stats.isDirectory() ? 'directory' : 'file',
334
+ documents_processed: result.documentsProcessed,
335
+ chunks_created: result.chunksCreated,
336
+ embeddings_generated: result.embeddingsGenerated,
337
+ document_errors: result.documentErrors,
338
+ embedding_errors: result.embeddingErrors,
339
+ processing_time_ms: result.processingTimeMs,
340
+ processing_time_seconds: Math.round(result.processingTimeMs / 1000 * 100) / 100,
341
+ chunks_per_second: result.processingTimeMs > 0 ?
342
+ Math.round(result.chunksCreated / (result.processingTimeMs / 1000) * 100) / 100 : 0,
343
+ success: true
344
+ };
345
+ return {
346
+ content: [
347
+ {
348
+ type: 'text',
349
+ text: JSON.stringify(ingestionSummary, null, 2),
350
+ },
351
+ ],
352
+ };
353
+ }
354
+ finally {
355
+ await pipeline.cleanup();
356
+ }
357
+ }
358
+ catch (error) {
359
+ // Handle model mismatch errors specifically
360
+ if (error instanceof Error && error.message.includes('Model mismatch detected')) {
361
+ const modelMismatchError = {
362
+ error: 'MODEL_MISMATCH',
363
+ message: 'Cannot perform ingestion due to model mismatch',
364
+ details: error.message,
365
+ resolution: {
366
+ action: 'manual_intervention_required',
367
+ explanation: 'The embedding model configuration does not match the indexed data. Please verify your setup before proceeding.',
368
+ options: [
369
+ 'Check if the model mismatch is intentional',
370
+ 'If you want to use a different model, manually run the rebuild_index tool',
371
+ 'Use force_rebuild: true parameter if you want to rebuild during ingestion',
372
+ 'Verify your model configuration matches your indexing setup'
373
+ ],
374
+ warning: 'Rebuilding will regenerate all embeddings and may take significant time'
375
+ }
376
+ };
377
+ return {
378
+ content: [
379
+ {
380
+ type: 'text',
381
+ text: JSON.stringify(modelMismatchError, null, 2),
382
+ },
383
+ ],
384
+ };
385
+ }
386
+ // Handle dimension mismatch errors
387
+ if (error instanceof Error && error.message.includes('dimension mismatch')) {
388
+ const dimensionMismatchError = {
389
+ error: 'DIMENSION_MISMATCH',
390
+ message: 'Cannot perform ingestion due to vector dimension mismatch',
391
+ details: error.message,
392
+ resolution: {
393
+ action: 'manual_intervention_required',
394
+ explanation: 'The vector dimensions do not match between the current model and the indexed data. Please verify your setup before proceeding.',
395
+ options: [
396
+ 'Check your model configuration',
397
+ 'If you want to change models, manually run the rebuild_index tool',
398
+ 'Use force_rebuild: true parameter if you want to rebuild during ingestion',
399
+ 'Ensure consistency between indexing and search models'
400
+ ],
401
+ warning: 'Rebuilding will regenerate all embeddings and may take significant time'
402
+ }
403
+ };
404
+ return {
405
+ content: [
406
+ {
407
+ type: 'text',
408
+ text: JSON.stringify(dimensionMismatchError, null, 2),
409
+ },
410
+ ],
411
+ };
412
+ }
413
+ // Handle initialization errors that might contain model mismatch information
414
+ if (error instanceof Error && error.message.includes('Failed to initialize')) {
415
+ // Check if the underlying error is a model mismatch
416
+ if (error.message.includes('Model mismatch') || error.message.includes('dimension mismatch')) {
417
+ const initializationError = {
418
+ error: 'INITIALIZATION_FAILED',
419
+ message: 'Cannot initialize ingestion due to model compatibility issues',
420
+ details: error.message,
421
+ resolution: {
422
+ action: 'manual_intervention_required',
423
+ explanation: 'The system cannot initialize due to model compatibility issues. Please verify your setup before proceeding.',
424
+ options: [
425
+ 'Check your model configuration',
426
+ 'If you want to change models, manually run the rebuild_index tool',
427
+ 'Verify consistency between your indexing and search setup'
428
+ ],
429
+ warning: 'Rebuilding will regenerate all embeddings and may take significant time'
430
+ }
431
+ };
432
+ return {
433
+ content: [
434
+ {
435
+ type: 'text',
436
+ text: JSON.stringify(initializationError, null, 2),
437
+ },
438
+ ],
439
+ };
440
+ }
441
+ }
442
+ // Re-throw other errors to be handled by the main error handler
443
+ throw error;
444
+ }
445
+ }
446
+ /**
447
+ * Handle rebuild index tool calls
448
+ * Wraps existing rebuild functionality as MCP tool
449
+ */
450
+ async handleRebuildIndex(_args) {
451
+ try {
452
+ // Use existing rebuild functionality
453
+ await rebuildIndex();
454
+ // Reset search engine initialization flag since index was rebuilt
455
+ this.isSearchEngineInitialized = false;
456
+ this.searchEngine = null;
457
+ const rebuildSummary = {
458
+ operation: 'rebuild_index',
459
+ success: true,
460
+ message: 'Vector index has been successfully rebuilt. All embeddings have been regenerated with the current model.'
461
+ };
462
+ return {
463
+ content: [
464
+ {
465
+ type: 'text',
466
+ text: JSON.stringify(rebuildSummary, null, 2),
467
+ },
468
+ ],
469
+ };
470
+ }
471
+ catch (error) {
472
+ throw new Error(`Index rebuild failed: ${error instanceof Error ? error.message : 'Unknown error'}`);
473
+ }
474
+ }
475
+ /**
476
+ * Handle get stats tool calls
477
+ * Provides statistics about the current search index
478
+ */
479
+ async handleGetStats(_args) {
480
+ try {
481
+ const stats = {
482
+ database_exists: existsSync(config.db_file),
483
+ index_exists: existsSync(config.index_file),
484
+ search_engine_initialized: this.isSearchEngineInitialized
485
+ };
486
+ // Get model information and compatibility status
487
+ const { getModelDefaults } = await import('./config.js');
488
+ const { getStoredModelInfo } = await import('./db.js');
489
+ const currentModel = config.embedding_model;
490
+ const currentDefaults = getModelDefaults(currentModel);
491
+ stats.model_info = {
492
+ current_model: currentModel,
493
+ current_dimensions: currentDefaults.dimensions,
494
+ model_specific_config: {
495
+ chunk_size: currentDefaults.chunk_size,
496
+ chunk_overlap: currentDefaults.chunk_overlap,
497
+ batch_size: currentDefaults.batch_size
498
+ }
499
+ };
500
+ // Check model compatibility if database exists
501
+ if (stats.database_exists) {
502
+ try {
503
+ const db = await openDatabase(config.db_file);
504
+ try {
505
+ const storedModel = await getStoredModelInfo(db);
506
+ if (storedModel) {
507
+ stats.model_info.stored_model = storedModel.modelName;
508
+ stats.model_info.stored_dimensions = storedModel.dimensions;
509
+ // Check for compatibility issues
510
+ const modelMatch = storedModel.modelName === currentModel;
511
+ const dimensionMatch = storedModel.dimensions === currentDefaults.dimensions;
512
+ stats.model_info.compatibility = {
513
+ model_matches: modelMatch,
514
+ dimensions_match: dimensionMatch,
515
+ compatible: modelMatch && dimensionMatch
516
+ };
517
+ if (!stats.model_info.compatibility.compatible) {
518
+ stats.model_info.compatibility.issue = 'Model mismatch detected - rebuild required';
519
+ stats.model_info.compatibility.resolution = 'Run "npm run rebuild" to rebuild the index with the new model';
520
+ }
521
+ }
522
+ else {
523
+ stats.model_info.compatibility = {
524
+ status: 'No stored model info - first run or needs rebuild'
525
+ };
526
+ }
527
+ // Get basic database stats
528
+ const docCount = await db.get('SELECT COUNT(*) as count FROM documents');
529
+ const chunkCount = await db.get('SELECT COUNT(*) as count FROM chunks');
530
+ stats.total_documents = docCount?.count || 0;
531
+ stats.total_chunks = chunkCount?.count || 0;
532
+ }
533
+ finally {
534
+ await db.close();
535
+ }
536
+ }
537
+ catch (error) {
538
+ stats.database_error = error instanceof Error ? error.message : 'Unknown error';
539
+ stats.model_info.compatibility = {
540
+ status: 'Error checking model compatibility',
541
+ error: error instanceof Error ? error.message : 'Unknown error'
542
+ };
543
+ }
544
+ }
545
+ else {
546
+ // No database exists - indicate this is a fresh setup
547
+ stats.model_info.compatibility = {
548
+ status: 'No database exists - fresh setup, no compatibility issues'
549
+ };
550
+ }
551
+ // If search engine is initialized, get detailed stats
552
+ if (this.isSearchEngineInitialized && this.searchEngine) {
553
+ const searchStats = await this.searchEngine.getStats();
554
+ stats.total_chunks = searchStats.totalChunks;
555
+ stats.index_size = searchStats.indexSize;
556
+ stats.reranking_enabled = searchStats.rerankingEnabled;
557
+ }
558
+ // Show effective configuration (with model-specific defaults applied)
559
+ const effectiveConfig = {
560
+ db_file: config.db_file,
561
+ index_file: config.index_file,
562
+ embedding_model: config.embedding_model,
563
+ chunk_size: currentDefaults.chunk_size, // Use model-specific default
564
+ chunk_overlap: currentDefaults.chunk_overlap, // Use model-specific default
565
+ batch_size: currentDefaults.batch_size, // Use model-specific default
566
+ top_k: config.top_k,
567
+ rerank_enabled: config.rerank_enabled
568
+ };
569
+ stats.config = effectiveConfig;
570
+ return {
571
+ content: [
572
+ {
573
+ type: 'text',
574
+ text: JSON.stringify(stats, null, 2),
575
+ },
576
+ ],
577
+ };
578
+ }
579
+ catch (error) {
580
+ throw new Error(`Failed to get stats: ${error instanceof Error ? error.message : 'Unknown error'}`);
581
+ }
582
+ }
583
+ /**
584
+ * Initialize search engine components
585
+ * Lazy initialization to avoid startup overhead when not needed
586
+ */
587
+ async initializeSearchEngine() {
588
+ if (this.isSearchEngineInitialized) {
589
+ return;
590
+ }
591
+ try {
592
+ // Validate configuration
593
+ validateConfig(config);
594
+ // Open database connection
595
+ const db = await openDatabase(config.db_file);
596
+ // Read stored model info from database (this is the key fix!)
597
+ const { getStoredModelInfo } = await import('./db.js');
598
+ const storedModelInfo = await getStoredModelInfo(db);
599
+ if (!storedModelInfo) {
600
+ throw new Error('No model information found in database. The database may be from an older version or corrupted. Try running ingestion again.');
601
+ }
602
+ // Use the stored model info instead of config.embedding_model
603
+ const { getModelDefaults } = await import('./config.js');
604
+ const modelDefaults = getModelDefaults(storedModelInfo.modelName);
605
+ const embedder = await initializeEmbeddingEngine(storedModelInfo.modelName, modelDefaults.batch_size);
606
+ // Initialize index manager with stored model info
607
+ const { IndexManager } = await import('./index-manager.js');
608
+ const indexManager = new IndexManager(config.index_file, config.db_file, storedModelInfo.dimensions, storedModelInfo.modelName);
609
+ await indexManager.initialize();
610
+ // Create search engine
611
+ this.searchEngine = SearchEngine.createWithComponents(embedder, indexManager, db, config.rerank_enabled);
612
+ await this.searchEngine.initialize();
613
+ this.isSearchEngineInitialized = true;
614
+ }
615
+ catch (error) {
616
+ // Check if this is a model mismatch error and re-throw with more context
617
+ if (error instanceof Error && (error.message.includes('Model mismatch detected') || error.message.includes('dimension mismatch'))) {
618
+ // Re-throw the original error - it already has good formatting from IndexManager
619
+ throw error;
620
+ }
621
+ // For other initialization errors, provide a generic wrapper
622
+ throw new Error(`Failed to initialize search engine: ${error instanceof Error ? error.message : 'Unknown error'}`);
623
+ }
624
+ }
625
+ /**
626
+ * Start the MCP server
627
+ * Ensures MCP server lives in same package as CLI with dual entry points
628
+ */
629
+ async start() {
630
+ const transport = new StdioServerTransport();
631
+ await this.server.connect(transport);
632
+ // Server will run until the transport is closed
633
+ console.error('RAG-lite TS MCP Server started successfully');
634
+ }
635
+ }
636
+ /**
637
+ * Main entry point for MCP server
638
+ * Implements MCP protocol interface without creating REST/GraphQL endpoints
639
+ */
640
+ async function main() {
641
+ try {
642
+ const server = new RagLiteMCPServer();
643
+ await server.start();
644
+ }
645
+ catch (error) {
646
+ if (error instanceof ConfigurationError) {
647
+ console.error('Configuration Error:', error.message);
648
+ process.exit(error.exitCode);
649
+ }
650
+ else {
651
+ console.error('Failed to start MCP server:', error instanceof Error ? error.message : String(error));
652
+ process.exit(1);
653
+ }
654
+ }
655
+ }
656
+ // Handle process signals for graceful shutdown
657
+ process.on('SIGINT', () => {
658
+ console.error('Received SIGINT, shutting down gracefully...');
659
+ process.exit(0);
660
+ });
661
+ process.on('SIGTERM', () => {
662
+ console.error('Received SIGTERM, shutting down gracefully...');
663
+ process.exit(0);
664
+ });
665
+ // Handle unhandled promise rejections
666
+ process.on('unhandledRejection', (reason, _promise) => {
667
+ console.error('Unhandled Promise Rejection:', reason);
668
+ process.exit(1);
669
+ });
670
+ // Handle uncaught exceptions
671
+ process.on('uncaughtException', (error) => {
672
+ console.error('Uncaught Exception:', error.message);
673
+ process.exit(1);
674
+ });
675
+ // Start the server
676
+ main().catch((error) => {
677
+ console.error('Fatal error:', error instanceof Error ? error.message : String(error));
678
+ process.exit(1);
679
+ });
680
+ //# sourceMappingURL=mcp-server.js.map