rag-lite-ts 1.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +240 -0
- package/dist/api-errors.d.ts +90 -0
- package/dist/api-errors.d.ts.map +1 -0
- package/dist/api-errors.js +320 -0
- package/dist/api-errors.js.map +1 -0
- package/dist/chunker.d.ts +47 -0
- package/dist/chunker.d.ts.map +1 -0
- package/dist/chunker.js +256 -0
- package/dist/chunker.js.map +1 -0
- package/dist/cli/indexer.d.ts +11 -0
- package/dist/cli/indexer.d.ts.map +1 -0
- package/dist/cli/indexer.js +272 -0
- package/dist/cli/indexer.js.map +1 -0
- package/dist/cli/search.d.ts +7 -0
- package/dist/cli/search.d.ts.map +1 -0
- package/dist/cli/search.js +206 -0
- package/dist/cli/search.js.map +1 -0
- package/dist/cli.d.ts +3 -0
- package/dist/cli.d.ts.map +1 -0
- package/dist/cli.js +362 -0
- package/dist/cli.js.map +1 -0
- package/dist/config.d.ts +90 -0
- package/dist/config.d.ts.map +1 -0
- package/dist/config.js +281 -0
- package/dist/config.js.map +1 -0
- package/dist/db.d.ts +90 -0
- package/dist/db.d.ts.map +1 -0
- package/dist/db.js +340 -0
- package/dist/db.js.map +1 -0
- package/dist/embedder.d.ts +101 -0
- package/dist/embedder.d.ts.map +1 -0
- package/dist/embedder.js +323 -0
- package/dist/embedder.js.map +1 -0
- package/dist/error-handler.d.ts +91 -0
- package/dist/error-handler.d.ts.map +1 -0
- package/dist/error-handler.js +196 -0
- package/dist/error-handler.js.map +1 -0
- package/dist/file-processor.d.ts +59 -0
- package/dist/file-processor.d.ts.map +1 -0
- package/dist/file-processor.js +312 -0
- package/dist/file-processor.js.map +1 -0
- package/dist/index-manager.d.ts +99 -0
- package/dist/index-manager.d.ts.map +1 -0
- package/dist/index-manager.js +444 -0
- package/dist/index-manager.js.map +1 -0
- package/dist/index.d.ts +13 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +21 -0
- package/dist/index.js.map +1 -0
- package/dist/indexer.d.ts +7 -0
- package/dist/indexer.d.ts.map +1 -0
- package/dist/indexer.js +51 -0
- package/dist/indexer.js.map +1 -0
- package/dist/ingestion.d.ts +175 -0
- package/dist/ingestion.d.ts.map +1 -0
- package/dist/ingestion.js +705 -0
- package/dist/ingestion.js.map +1 -0
- package/dist/mcp-server.d.ts +14 -0
- package/dist/mcp-server.d.ts.map +1 -0
- package/dist/mcp-server.js +680 -0
- package/dist/mcp-server.js.map +1 -0
- package/dist/path-manager.d.ts +42 -0
- package/dist/path-manager.d.ts.map +1 -0
- package/dist/path-manager.js +66 -0
- package/dist/path-manager.js.map +1 -0
- package/dist/preprocess.d.ts +19 -0
- package/dist/preprocess.d.ts.map +1 -0
- package/dist/preprocess.js +203 -0
- package/dist/preprocess.js.map +1 -0
- package/dist/preprocessors/index.d.ts +17 -0
- package/dist/preprocessors/index.d.ts.map +1 -0
- package/dist/preprocessors/index.js +38 -0
- package/dist/preprocessors/index.js.map +1 -0
- package/dist/preprocessors/mdx.d.ts +25 -0
- package/dist/preprocessors/mdx.d.ts.map +1 -0
- package/dist/preprocessors/mdx.js +101 -0
- package/dist/preprocessors/mdx.js.map +1 -0
- package/dist/preprocessors/mermaid.d.ts +68 -0
- package/dist/preprocessors/mermaid.d.ts.map +1 -0
- package/dist/preprocessors/mermaid.js +329 -0
- package/dist/preprocessors/mermaid.js.map +1 -0
- package/dist/preprocessors/registry.d.ts +56 -0
- package/dist/preprocessors/registry.d.ts.map +1 -0
- package/dist/preprocessors/registry.js +179 -0
- package/dist/preprocessors/registry.js.map +1 -0
- package/dist/reranker.d.ts +40 -0
- package/dist/reranker.d.ts.map +1 -0
- package/dist/reranker.js +212 -0
- package/dist/reranker.js.map +1 -0
- package/dist/resource-manager-demo.d.ts +7 -0
- package/dist/resource-manager-demo.d.ts.map +1 -0
- package/dist/resource-manager-demo.js +52 -0
- package/dist/resource-manager-demo.js.map +1 -0
- package/dist/resource-manager.d.ts +129 -0
- package/dist/resource-manager.d.ts.map +1 -0
- package/dist/resource-manager.js +389 -0
- package/dist/resource-manager.js.map +1 -0
- package/dist/search-standalone.d.ts +7 -0
- package/dist/search-standalone.d.ts.map +1 -0
- package/dist/search-standalone.js +117 -0
- package/dist/search-standalone.js.map +1 -0
- package/dist/search.d.ts +92 -0
- package/dist/search.d.ts.map +1 -0
- package/dist/search.js +454 -0
- package/dist/search.js.map +1 -0
- package/dist/test-utils.d.ts +36 -0
- package/dist/test-utils.d.ts.map +1 -0
- package/dist/test-utils.js +27 -0
- package/dist/test-utils.js.map +1 -0
- package/dist/tokenizer.d.ts +21 -0
- package/dist/tokenizer.d.ts.map +1 -0
- package/dist/tokenizer.js +59 -0
- package/dist/tokenizer.js.map +1 -0
- package/dist/types.d.ts +44 -0
- package/dist/types.d.ts.map +1 -0
- package/dist/types.js +3 -0
- package/dist/types.js.map +1 -0
- package/dist/vector-index.d.ts +64 -0
- package/dist/vector-index.d.ts.map +1 -0
- package/dist/vector-index.js +308 -0
- package/dist/vector-index.js.map +1 -0
- package/package.json +80 -0
|
@@ -0,0 +1,680 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
/**
|
|
3
|
+
* MCP server entry point for rag-lite-ts
|
|
4
|
+
*
|
|
5
|
+
* This is a thin wrapper around existing search and indexing functions
|
|
6
|
+
* that exposes them as MCP tools without creating REST/GraphQL endpoints.
|
|
7
|
+
*
|
|
8
|
+
* The MCP server lives in the same package as CLI with dual entry points
|
|
9
|
+
* and provides proper MCP tool definitions for search and indexing capabilities.
|
|
10
|
+
*
|
|
11
|
+
* Requirements addressed: 6.2, 6.4, 6.5, 6.6
|
|
12
|
+
*/
|
|
13
|
+
import { Server } from '@modelcontextprotocol/sdk/server/index.js';
|
|
14
|
+
import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js';
|
|
15
|
+
import { CallToolRequestSchema, ListToolsRequestSchema, } from '@modelcontextprotocol/sdk/types.js';
|
|
16
|
+
import { existsSync, statSync } from 'fs';
|
|
17
|
+
import { resolve } from 'path';
|
|
18
|
+
import { SearchEngine } from './search.js';
|
|
19
|
+
import { IngestionPipeline, rebuildIndex } from './ingestion.js';
|
|
20
|
+
import { initializeEmbeddingEngine } from './embedder.js';
|
|
21
|
+
import { openDatabase } from './db.js';
|
|
22
|
+
import { config, validateConfig, ConfigurationError } from './config.js';
|
|
23
|
+
/**
|
|
24
|
+
* MCP Server class that wraps RAG-lite TS functionality
|
|
25
|
+
* Implements MCP protocol interface without creating REST/GraphQL endpoints
|
|
26
|
+
*/
|
|
27
|
+
class RagLiteMCPServer {
|
|
28
|
+
server;
|
|
29
|
+
searchEngine = null;
|
|
30
|
+
isSearchEngineInitialized = false;
|
|
31
|
+
constructor() {
|
|
32
|
+
this.server = new Server({
|
|
33
|
+
name: 'rag-lite-ts',
|
|
34
|
+
version: '1.0.0',
|
|
35
|
+
}, {
|
|
36
|
+
capabilities: {
|
|
37
|
+
tools: {},
|
|
38
|
+
},
|
|
39
|
+
});
|
|
40
|
+
this.setupToolHandlers();
|
|
41
|
+
}
|
|
42
|
+
/**
|
|
43
|
+
* Set up MCP tool handlers for search and indexing capabilities
|
|
44
|
+
* Add proper MCP tool definitions for search and indexing capabilities
|
|
45
|
+
*/
|
|
46
|
+
setupToolHandlers() {
|
|
47
|
+
// List available tools
|
|
48
|
+
this.server.setRequestHandler(ListToolsRequestSchema, async () => {
|
|
49
|
+
return {
|
|
50
|
+
tools: [
|
|
51
|
+
{
|
|
52
|
+
name: 'search',
|
|
53
|
+
description: 'Search indexed documents using semantic similarity. Returns relevant document chunks with scores and metadata.',
|
|
54
|
+
inputSchema: {
|
|
55
|
+
type: 'object',
|
|
56
|
+
properties: {
|
|
57
|
+
query: {
|
|
58
|
+
type: 'string',
|
|
59
|
+
description: 'Search query string to find relevant documents',
|
|
60
|
+
minLength: 1,
|
|
61
|
+
maxLength: 500
|
|
62
|
+
},
|
|
63
|
+
top_k: {
|
|
64
|
+
type: 'number',
|
|
65
|
+
description: 'Number of results to return (default: 10, max: 100)',
|
|
66
|
+
minimum: 1,
|
|
67
|
+
maximum: 100,
|
|
68
|
+
default: 10
|
|
69
|
+
},
|
|
70
|
+
rerank: {
|
|
71
|
+
type: 'boolean',
|
|
72
|
+
description: 'Enable reranking for better result quality (default: false)',
|
|
73
|
+
default: false
|
|
74
|
+
}
|
|
75
|
+
},
|
|
76
|
+
required: ['query'],
|
|
77
|
+
additionalProperties: false
|
|
78
|
+
}
|
|
79
|
+
},
|
|
80
|
+
{
|
|
81
|
+
name: 'ingest',
|
|
82
|
+
description: 'Ingest documents from a file or directory path. Processes .md and .txt files, chunks them, generates embeddings, and stores in the search index.',
|
|
83
|
+
inputSchema: {
|
|
84
|
+
type: 'object',
|
|
85
|
+
properties: {
|
|
86
|
+
path: {
|
|
87
|
+
type: 'string',
|
|
88
|
+
description: 'File or directory path to ingest. Can be a single .md/.txt file or directory containing such files.'
|
|
89
|
+
},
|
|
90
|
+
model: {
|
|
91
|
+
type: 'string',
|
|
92
|
+
description: 'Embedding model to use (default: sentence-transformers/all-MiniLM-L6-v2). Options: sentence-transformers/all-MiniLM-L6-v2, Xenova/all-mpnet-base-v2',
|
|
93
|
+
enum: ['sentence-transformers/all-MiniLM-L6-v2', 'Xenova/all-mpnet-base-v2']
|
|
94
|
+
},
|
|
95
|
+
force_rebuild: {
|
|
96
|
+
type: 'boolean',
|
|
97
|
+
description: 'Force rebuild of the entire index (default: false)',
|
|
98
|
+
default: false
|
|
99
|
+
}
|
|
100
|
+
},
|
|
101
|
+
required: ['path'],
|
|
102
|
+
additionalProperties: false
|
|
103
|
+
}
|
|
104
|
+
},
|
|
105
|
+
{
|
|
106
|
+
name: 'rebuild_index',
|
|
107
|
+
description: 'Rebuild the entire vector index from scratch. Useful when model version changes or for maintenance. This will regenerate all embeddings.',
|
|
108
|
+
inputSchema: {
|
|
109
|
+
type: 'object',
|
|
110
|
+
properties: {},
|
|
111
|
+
additionalProperties: false
|
|
112
|
+
}
|
|
113
|
+
},
|
|
114
|
+
{
|
|
115
|
+
name: 'get_stats',
|
|
116
|
+
description: 'Get statistics about the current search index including number of documents, chunks, and index status.',
|
|
117
|
+
inputSchema: {
|
|
118
|
+
type: 'object',
|
|
119
|
+
properties: {},
|
|
120
|
+
additionalProperties: false
|
|
121
|
+
}
|
|
122
|
+
}
|
|
123
|
+
],
|
|
124
|
+
};
|
|
125
|
+
});
|
|
126
|
+
// Handle tool calls
|
|
127
|
+
this.server.setRequestHandler(CallToolRequestSchema, async (request) => {
|
|
128
|
+
const { name, arguments: args } = request.params;
|
|
129
|
+
try {
|
|
130
|
+
switch (name) {
|
|
131
|
+
case 'search':
|
|
132
|
+
return await this.handleSearch(args);
|
|
133
|
+
case 'ingest':
|
|
134
|
+
return await this.handleIngest(args);
|
|
135
|
+
case 'rebuild_index':
|
|
136
|
+
return await this.handleRebuildIndex(args);
|
|
137
|
+
case 'get_stats':
|
|
138
|
+
return await this.handleGetStats(args);
|
|
139
|
+
default:
|
|
140
|
+
throw new Error(`Unknown tool: ${name}`);
|
|
141
|
+
}
|
|
142
|
+
}
|
|
143
|
+
catch (error) {
|
|
144
|
+
const errorMessage = error instanceof Error ? error.message : String(error);
|
|
145
|
+
return {
|
|
146
|
+
content: [
|
|
147
|
+
{
|
|
148
|
+
type: 'text',
|
|
149
|
+
text: `Error: ${errorMessage}`,
|
|
150
|
+
},
|
|
151
|
+
],
|
|
152
|
+
};
|
|
153
|
+
}
|
|
154
|
+
});
|
|
155
|
+
}
|
|
156
|
+
/**
|
|
157
|
+
* Handle search tool calls
|
|
158
|
+
* Wraps existing search functionality as MCP tool
|
|
159
|
+
*/
|
|
160
|
+
async handleSearch(args) {
|
|
161
|
+
try {
|
|
162
|
+
// Validate arguments
|
|
163
|
+
if (!args.query || typeof args.query !== 'string') {
|
|
164
|
+
throw new Error('Query parameter is required and must be a string');
|
|
165
|
+
}
|
|
166
|
+
if (args.query.trim().length === 0) {
|
|
167
|
+
throw new Error('Query cannot be empty');
|
|
168
|
+
}
|
|
169
|
+
if (args.query.length > 500) {
|
|
170
|
+
throw new Error('Query is too long (maximum 500 characters)');
|
|
171
|
+
}
|
|
172
|
+
// Validate optional parameters
|
|
173
|
+
if (args.top_k !== undefined) {
|
|
174
|
+
if (typeof args.top_k !== 'number' || args.top_k < 1 || args.top_k > 100) {
|
|
175
|
+
throw new Error('top_k must be a number between 1 and 100');
|
|
176
|
+
}
|
|
177
|
+
}
|
|
178
|
+
if (args.rerank !== undefined && typeof args.rerank !== 'boolean') {
|
|
179
|
+
throw new Error('rerank must be a boolean');
|
|
180
|
+
}
|
|
181
|
+
// Check if database and index exist
|
|
182
|
+
if (!existsSync(config.db_file)) {
|
|
183
|
+
throw new Error('No database found. You need to ingest documents first using the ingest tool.');
|
|
184
|
+
}
|
|
185
|
+
if (!existsSync(config.index_file)) {
|
|
186
|
+
throw new Error('No vector index found. The ingestion may not have completed successfully. Try using the ingest tool or rebuild_index tool.');
|
|
187
|
+
}
|
|
188
|
+
// Initialize search engine if needed
|
|
189
|
+
if (!this.isSearchEngineInitialized) {
|
|
190
|
+
await this.initializeSearchEngine();
|
|
191
|
+
}
|
|
192
|
+
// Prepare search options
|
|
193
|
+
const searchOptions = {
|
|
194
|
+
top_k: args.top_k || config.top_k || 10,
|
|
195
|
+
rerank: args.rerank !== undefined ? args.rerank : config.rerank_enabled
|
|
196
|
+
};
|
|
197
|
+
// Perform search using existing search functionality
|
|
198
|
+
const startTime = Date.now();
|
|
199
|
+
const results = await this.searchEngine.search(args.query, searchOptions);
|
|
200
|
+
const searchTime = Date.now() - startTime;
|
|
201
|
+
// Format results for MCP response
|
|
202
|
+
const formattedResults = {
|
|
203
|
+
query: args.query,
|
|
204
|
+
results_count: results.length,
|
|
205
|
+
search_time_ms: searchTime,
|
|
206
|
+
results: results.map((result, index) => ({
|
|
207
|
+
rank: index + 1,
|
|
208
|
+
score: Math.round(result.score * 100) / 100, // Round to 2 decimal places
|
|
209
|
+
document: {
|
|
210
|
+
id: result.document.id,
|
|
211
|
+
title: result.document.title,
|
|
212
|
+
source: result.document.source
|
|
213
|
+
},
|
|
214
|
+
text: result.text
|
|
215
|
+
}))
|
|
216
|
+
};
|
|
217
|
+
return {
|
|
218
|
+
content: [
|
|
219
|
+
{
|
|
220
|
+
type: 'text',
|
|
221
|
+
text: JSON.stringify(formattedResults, null, 2),
|
|
222
|
+
},
|
|
223
|
+
],
|
|
224
|
+
};
|
|
225
|
+
}
|
|
226
|
+
catch (error) {
|
|
227
|
+
// Handle model mismatch errors specifically
|
|
228
|
+
if (error instanceof Error && error.message.includes('Model mismatch detected')) {
|
|
229
|
+
const modelMismatchError = {
|
|
230
|
+
error: 'MODEL_MISMATCH',
|
|
231
|
+
message: 'Cannot perform search due to model mismatch',
|
|
232
|
+
details: error.message,
|
|
233
|
+
resolution: {
|
|
234
|
+
action: 'manual_intervention_required',
|
|
235
|
+
explanation: 'The embedding model configuration does not match the indexed data. Please verify your setup before proceeding.',
|
|
236
|
+
options: [
|
|
237
|
+
'Check if the model mismatch is intentional',
|
|
238
|
+
'If you want to use a different model, manually run the rebuild_index tool',
|
|
239
|
+
'Verify your model configuration matches your indexing setup'
|
|
240
|
+
],
|
|
241
|
+
warning: 'Rebuilding will regenerate all embeddings and may take significant time'
|
|
242
|
+
}
|
|
243
|
+
};
|
|
244
|
+
return {
|
|
245
|
+
content: [
|
|
246
|
+
{
|
|
247
|
+
type: 'text',
|
|
248
|
+
text: JSON.stringify(modelMismatchError, null, 2),
|
|
249
|
+
},
|
|
250
|
+
],
|
|
251
|
+
};
|
|
252
|
+
}
|
|
253
|
+
// Handle dimension mismatch errors
|
|
254
|
+
if (error instanceof Error && error.message.includes('dimension mismatch')) {
|
|
255
|
+
const dimensionMismatchError = {
|
|
256
|
+
error: 'DIMENSION_MISMATCH',
|
|
257
|
+
message: 'Cannot perform search due to vector dimension mismatch',
|
|
258
|
+
details: error.message,
|
|
259
|
+
resolution: {
|
|
260
|
+
action: 'manual_intervention_required',
|
|
261
|
+
explanation: 'The vector dimensions do not match between the current model and the indexed data. Please verify your setup before proceeding.',
|
|
262
|
+
options: [
|
|
263
|
+
'Check your model configuration',
|
|
264
|
+
'If you want to change models, manually run the rebuild_index tool',
|
|
265
|
+
'Ensure consistency between indexing and search models'
|
|
266
|
+
],
|
|
267
|
+
warning: 'Rebuilding will regenerate all embeddings and may take significant time'
|
|
268
|
+
}
|
|
269
|
+
};
|
|
270
|
+
return {
|
|
271
|
+
content: [
|
|
272
|
+
{
|
|
273
|
+
type: 'text',
|
|
274
|
+
text: JSON.stringify(dimensionMismatchError, null, 2),
|
|
275
|
+
},
|
|
276
|
+
],
|
|
277
|
+
};
|
|
278
|
+
}
|
|
279
|
+
// Re-throw other errors to be handled by the main error handler
|
|
280
|
+
throw error;
|
|
281
|
+
}
|
|
282
|
+
}
|
|
283
|
+
/**
|
|
284
|
+
* Handle ingest tool calls
|
|
285
|
+
* Wraps existing ingestion functionality as MCP tool
|
|
286
|
+
*/
|
|
287
|
+
async handleIngest(args) {
|
|
288
|
+
try {
|
|
289
|
+
// Validate arguments
|
|
290
|
+
if (!args.path || typeof args.path !== 'string') {
|
|
291
|
+
throw new Error('Path parameter is required and must be a string');
|
|
292
|
+
}
|
|
293
|
+
// Validate path exists
|
|
294
|
+
const resolvedPath = resolve(args.path);
|
|
295
|
+
if (!existsSync(resolvedPath)) {
|
|
296
|
+
throw new Error(`Path does not exist: ${args.path}`);
|
|
297
|
+
}
|
|
298
|
+
// Check if it's a file or directory and validate
|
|
299
|
+
let stats;
|
|
300
|
+
try {
|
|
301
|
+
stats = statSync(resolvedPath);
|
|
302
|
+
}
|
|
303
|
+
catch (error) {
|
|
304
|
+
throw new Error(`Cannot access path: ${args.path}. Check permissions.`);
|
|
305
|
+
}
|
|
306
|
+
// Validate file type for single files
|
|
307
|
+
if (stats.isFile()) {
|
|
308
|
+
const validExtensions = ['.md', '.txt'];
|
|
309
|
+
const hasValidExtension = validExtensions.some(ext => args.path.toLowerCase().endsWith(ext));
|
|
310
|
+
if (!hasValidExtension) {
|
|
311
|
+
throw new Error(`Unsupported file type: ${args.path}. Supported types: .md, .txt`);
|
|
312
|
+
}
|
|
313
|
+
}
|
|
314
|
+
// Validate model parameter if provided
|
|
315
|
+
if (args.model && !['sentence-transformers/all-MiniLM-L6-v2', 'Xenova/all-mpnet-base-v2'].includes(args.model)) {
|
|
316
|
+
throw new Error(`Unsupported model: ${args.model}. Supported models: sentence-transformers/all-MiniLM-L6-v2, Xenova/all-mpnet-base-v2`);
|
|
317
|
+
}
|
|
318
|
+
// Create config overrides if model is specified
|
|
319
|
+
const configOverrides = args.model ? { embedding_model: args.model } : {};
|
|
320
|
+
// Create and run ingestion pipeline using existing functionality
|
|
321
|
+
const pipeline = new IngestionPipeline();
|
|
322
|
+
pipeline.setConfigOverrides(configOverrides);
|
|
323
|
+
try {
|
|
324
|
+
const result = await pipeline.ingestPath(resolvedPath, {
|
|
325
|
+
forceRebuild: args.force_rebuild || false
|
|
326
|
+
});
|
|
327
|
+
// Reset search engine initialization flag since index may have changed
|
|
328
|
+
this.isSearchEngineInitialized = false;
|
|
329
|
+
this.searchEngine = null;
|
|
330
|
+
// Format results for MCP response
|
|
331
|
+
const ingestionSummary = {
|
|
332
|
+
path: resolvedPath,
|
|
333
|
+
path_type: stats.isDirectory() ? 'directory' : 'file',
|
|
334
|
+
documents_processed: result.documentsProcessed,
|
|
335
|
+
chunks_created: result.chunksCreated,
|
|
336
|
+
embeddings_generated: result.embeddingsGenerated,
|
|
337
|
+
document_errors: result.documentErrors,
|
|
338
|
+
embedding_errors: result.embeddingErrors,
|
|
339
|
+
processing_time_ms: result.processingTimeMs,
|
|
340
|
+
processing_time_seconds: Math.round(result.processingTimeMs / 1000 * 100) / 100,
|
|
341
|
+
chunks_per_second: result.processingTimeMs > 0 ?
|
|
342
|
+
Math.round(result.chunksCreated / (result.processingTimeMs / 1000) * 100) / 100 : 0,
|
|
343
|
+
success: true
|
|
344
|
+
};
|
|
345
|
+
return {
|
|
346
|
+
content: [
|
|
347
|
+
{
|
|
348
|
+
type: 'text',
|
|
349
|
+
text: JSON.stringify(ingestionSummary, null, 2),
|
|
350
|
+
},
|
|
351
|
+
],
|
|
352
|
+
};
|
|
353
|
+
}
|
|
354
|
+
finally {
|
|
355
|
+
await pipeline.cleanup();
|
|
356
|
+
}
|
|
357
|
+
}
|
|
358
|
+
catch (error) {
|
|
359
|
+
// Handle model mismatch errors specifically
|
|
360
|
+
if (error instanceof Error && error.message.includes('Model mismatch detected')) {
|
|
361
|
+
const modelMismatchError = {
|
|
362
|
+
error: 'MODEL_MISMATCH',
|
|
363
|
+
message: 'Cannot perform ingestion due to model mismatch',
|
|
364
|
+
details: error.message,
|
|
365
|
+
resolution: {
|
|
366
|
+
action: 'manual_intervention_required',
|
|
367
|
+
explanation: 'The embedding model configuration does not match the indexed data. Please verify your setup before proceeding.',
|
|
368
|
+
options: [
|
|
369
|
+
'Check if the model mismatch is intentional',
|
|
370
|
+
'If you want to use a different model, manually run the rebuild_index tool',
|
|
371
|
+
'Use force_rebuild: true parameter if you want to rebuild during ingestion',
|
|
372
|
+
'Verify your model configuration matches your indexing setup'
|
|
373
|
+
],
|
|
374
|
+
warning: 'Rebuilding will regenerate all embeddings and may take significant time'
|
|
375
|
+
}
|
|
376
|
+
};
|
|
377
|
+
return {
|
|
378
|
+
content: [
|
|
379
|
+
{
|
|
380
|
+
type: 'text',
|
|
381
|
+
text: JSON.stringify(modelMismatchError, null, 2),
|
|
382
|
+
},
|
|
383
|
+
],
|
|
384
|
+
};
|
|
385
|
+
}
|
|
386
|
+
// Handle dimension mismatch errors
|
|
387
|
+
if (error instanceof Error && error.message.includes('dimension mismatch')) {
|
|
388
|
+
const dimensionMismatchError = {
|
|
389
|
+
error: 'DIMENSION_MISMATCH',
|
|
390
|
+
message: 'Cannot perform ingestion due to vector dimension mismatch',
|
|
391
|
+
details: error.message,
|
|
392
|
+
resolution: {
|
|
393
|
+
action: 'manual_intervention_required',
|
|
394
|
+
explanation: 'The vector dimensions do not match between the current model and the indexed data. Please verify your setup before proceeding.',
|
|
395
|
+
options: [
|
|
396
|
+
'Check your model configuration',
|
|
397
|
+
'If you want to change models, manually run the rebuild_index tool',
|
|
398
|
+
'Use force_rebuild: true parameter if you want to rebuild during ingestion',
|
|
399
|
+
'Ensure consistency between indexing and search models'
|
|
400
|
+
],
|
|
401
|
+
warning: 'Rebuilding will regenerate all embeddings and may take significant time'
|
|
402
|
+
}
|
|
403
|
+
};
|
|
404
|
+
return {
|
|
405
|
+
content: [
|
|
406
|
+
{
|
|
407
|
+
type: 'text',
|
|
408
|
+
text: JSON.stringify(dimensionMismatchError, null, 2),
|
|
409
|
+
},
|
|
410
|
+
],
|
|
411
|
+
};
|
|
412
|
+
}
|
|
413
|
+
// Handle initialization errors that might contain model mismatch information
|
|
414
|
+
if (error instanceof Error && error.message.includes('Failed to initialize')) {
|
|
415
|
+
// Check if the underlying error is a model mismatch
|
|
416
|
+
if (error.message.includes('Model mismatch') || error.message.includes('dimension mismatch')) {
|
|
417
|
+
const initializationError = {
|
|
418
|
+
error: 'INITIALIZATION_FAILED',
|
|
419
|
+
message: 'Cannot initialize ingestion due to model compatibility issues',
|
|
420
|
+
details: error.message,
|
|
421
|
+
resolution: {
|
|
422
|
+
action: 'manual_intervention_required',
|
|
423
|
+
explanation: 'The system cannot initialize due to model compatibility issues. Please verify your setup before proceeding.',
|
|
424
|
+
options: [
|
|
425
|
+
'Check your model configuration',
|
|
426
|
+
'If you want to change models, manually run the rebuild_index tool',
|
|
427
|
+
'Verify consistency between your indexing and search setup'
|
|
428
|
+
],
|
|
429
|
+
warning: 'Rebuilding will regenerate all embeddings and may take significant time'
|
|
430
|
+
}
|
|
431
|
+
};
|
|
432
|
+
return {
|
|
433
|
+
content: [
|
|
434
|
+
{
|
|
435
|
+
type: 'text',
|
|
436
|
+
text: JSON.stringify(initializationError, null, 2),
|
|
437
|
+
},
|
|
438
|
+
],
|
|
439
|
+
};
|
|
440
|
+
}
|
|
441
|
+
}
|
|
442
|
+
// Re-throw other errors to be handled by the main error handler
|
|
443
|
+
throw error;
|
|
444
|
+
}
|
|
445
|
+
}
|
|
446
|
+
/**
|
|
447
|
+
* Handle rebuild index tool calls
|
|
448
|
+
* Wraps existing rebuild functionality as MCP tool
|
|
449
|
+
*/
|
|
450
|
+
async handleRebuildIndex(_args) {
|
|
451
|
+
try {
|
|
452
|
+
// Use existing rebuild functionality
|
|
453
|
+
await rebuildIndex();
|
|
454
|
+
// Reset search engine initialization flag since index was rebuilt
|
|
455
|
+
this.isSearchEngineInitialized = false;
|
|
456
|
+
this.searchEngine = null;
|
|
457
|
+
const rebuildSummary = {
|
|
458
|
+
operation: 'rebuild_index',
|
|
459
|
+
success: true,
|
|
460
|
+
message: 'Vector index has been successfully rebuilt. All embeddings have been regenerated with the current model.'
|
|
461
|
+
};
|
|
462
|
+
return {
|
|
463
|
+
content: [
|
|
464
|
+
{
|
|
465
|
+
type: 'text',
|
|
466
|
+
text: JSON.stringify(rebuildSummary, null, 2),
|
|
467
|
+
},
|
|
468
|
+
],
|
|
469
|
+
};
|
|
470
|
+
}
|
|
471
|
+
catch (error) {
|
|
472
|
+
throw new Error(`Index rebuild failed: ${error instanceof Error ? error.message : 'Unknown error'}`);
|
|
473
|
+
}
|
|
474
|
+
}
|
|
475
|
+
/**
|
|
476
|
+
* Handle get stats tool calls
|
|
477
|
+
* Provides statistics about the current search index
|
|
478
|
+
*/
|
|
479
|
+
async handleGetStats(_args) {
|
|
480
|
+
try {
|
|
481
|
+
const stats = {
|
|
482
|
+
database_exists: existsSync(config.db_file),
|
|
483
|
+
index_exists: existsSync(config.index_file),
|
|
484
|
+
search_engine_initialized: this.isSearchEngineInitialized
|
|
485
|
+
};
|
|
486
|
+
// Get model information and compatibility status
|
|
487
|
+
const { getModelDefaults } = await import('./config.js');
|
|
488
|
+
const { getStoredModelInfo } = await import('./db.js');
|
|
489
|
+
const currentModel = config.embedding_model;
|
|
490
|
+
const currentDefaults = getModelDefaults(currentModel);
|
|
491
|
+
stats.model_info = {
|
|
492
|
+
current_model: currentModel,
|
|
493
|
+
current_dimensions: currentDefaults.dimensions,
|
|
494
|
+
model_specific_config: {
|
|
495
|
+
chunk_size: currentDefaults.chunk_size,
|
|
496
|
+
chunk_overlap: currentDefaults.chunk_overlap,
|
|
497
|
+
batch_size: currentDefaults.batch_size
|
|
498
|
+
}
|
|
499
|
+
};
|
|
500
|
+
// Check model compatibility if database exists
|
|
501
|
+
if (stats.database_exists) {
|
|
502
|
+
try {
|
|
503
|
+
const db = await openDatabase(config.db_file);
|
|
504
|
+
try {
|
|
505
|
+
const storedModel = await getStoredModelInfo(db);
|
|
506
|
+
if (storedModel) {
|
|
507
|
+
stats.model_info.stored_model = storedModel.modelName;
|
|
508
|
+
stats.model_info.stored_dimensions = storedModel.dimensions;
|
|
509
|
+
// Check for compatibility issues
|
|
510
|
+
const modelMatch = storedModel.modelName === currentModel;
|
|
511
|
+
const dimensionMatch = storedModel.dimensions === currentDefaults.dimensions;
|
|
512
|
+
stats.model_info.compatibility = {
|
|
513
|
+
model_matches: modelMatch,
|
|
514
|
+
dimensions_match: dimensionMatch,
|
|
515
|
+
compatible: modelMatch && dimensionMatch
|
|
516
|
+
};
|
|
517
|
+
if (!stats.model_info.compatibility.compatible) {
|
|
518
|
+
stats.model_info.compatibility.issue = 'Model mismatch detected - rebuild required';
|
|
519
|
+
stats.model_info.compatibility.resolution = 'Run "npm run rebuild" to rebuild the index with the new model';
|
|
520
|
+
}
|
|
521
|
+
}
|
|
522
|
+
else {
|
|
523
|
+
stats.model_info.compatibility = {
|
|
524
|
+
status: 'No stored model info - first run or needs rebuild'
|
|
525
|
+
};
|
|
526
|
+
}
|
|
527
|
+
// Get basic database stats
|
|
528
|
+
const docCount = await db.get('SELECT COUNT(*) as count FROM documents');
|
|
529
|
+
const chunkCount = await db.get('SELECT COUNT(*) as count FROM chunks');
|
|
530
|
+
stats.total_documents = docCount?.count || 0;
|
|
531
|
+
stats.total_chunks = chunkCount?.count || 0;
|
|
532
|
+
}
|
|
533
|
+
finally {
|
|
534
|
+
await db.close();
|
|
535
|
+
}
|
|
536
|
+
}
|
|
537
|
+
catch (error) {
|
|
538
|
+
stats.database_error = error instanceof Error ? error.message : 'Unknown error';
|
|
539
|
+
stats.model_info.compatibility = {
|
|
540
|
+
status: 'Error checking model compatibility',
|
|
541
|
+
error: error instanceof Error ? error.message : 'Unknown error'
|
|
542
|
+
};
|
|
543
|
+
}
|
|
544
|
+
}
|
|
545
|
+
else {
|
|
546
|
+
// No database exists - indicate this is a fresh setup
|
|
547
|
+
stats.model_info.compatibility = {
|
|
548
|
+
status: 'No database exists - fresh setup, no compatibility issues'
|
|
549
|
+
};
|
|
550
|
+
}
|
|
551
|
+
// If search engine is initialized, get detailed stats
|
|
552
|
+
if (this.isSearchEngineInitialized && this.searchEngine) {
|
|
553
|
+
const searchStats = await this.searchEngine.getStats();
|
|
554
|
+
stats.total_chunks = searchStats.totalChunks;
|
|
555
|
+
stats.index_size = searchStats.indexSize;
|
|
556
|
+
stats.reranking_enabled = searchStats.rerankingEnabled;
|
|
557
|
+
}
|
|
558
|
+
// Show effective configuration (with model-specific defaults applied)
|
|
559
|
+
const effectiveConfig = {
|
|
560
|
+
db_file: config.db_file,
|
|
561
|
+
index_file: config.index_file,
|
|
562
|
+
embedding_model: config.embedding_model,
|
|
563
|
+
chunk_size: currentDefaults.chunk_size, // Use model-specific default
|
|
564
|
+
chunk_overlap: currentDefaults.chunk_overlap, // Use model-specific default
|
|
565
|
+
batch_size: currentDefaults.batch_size, // Use model-specific default
|
|
566
|
+
top_k: config.top_k,
|
|
567
|
+
rerank_enabled: config.rerank_enabled
|
|
568
|
+
};
|
|
569
|
+
stats.config = effectiveConfig;
|
|
570
|
+
return {
|
|
571
|
+
content: [
|
|
572
|
+
{
|
|
573
|
+
type: 'text',
|
|
574
|
+
text: JSON.stringify(stats, null, 2),
|
|
575
|
+
},
|
|
576
|
+
],
|
|
577
|
+
};
|
|
578
|
+
}
|
|
579
|
+
catch (error) {
|
|
580
|
+
throw new Error(`Failed to get stats: ${error instanceof Error ? error.message : 'Unknown error'}`);
|
|
581
|
+
}
|
|
582
|
+
}
|
|
583
|
+
/**
|
|
584
|
+
* Initialize search engine components
|
|
585
|
+
* Lazy initialization to avoid startup overhead when not needed
|
|
586
|
+
*/
|
|
587
|
+
async initializeSearchEngine() {
|
|
588
|
+
if (this.isSearchEngineInitialized) {
|
|
589
|
+
return;
|
|
590
|
+
}
|
|
591
|
+
try {
|
|
592
|
+
// Validate configuration
|
|
593
|
+
validateConfig(config);
|
|
594
|
+
// Open database connection
|
|
595
|
+
const db = await openDatabase(config.db_file);
|
|
596
|
+
// Read stored model info from database (this is the key fix!)
|
|
597
|
+
const { getStoredModelInfo } = await import('./db.js');
|
|
598
|
+
const storedModelInfo = await getStoredModelInfo(db);
|
|
599
|
+
if (!storedModelInfo) {
|
|
600
|
+
throw new Error('No model information found in database. The database may be from an older version or corrupted. Try running ingestion again.');
|
|
601
|
+
}
|
|
602
|
+
// Use the stored model info instead of config.embedding_model
|
|
603
|
+
const { getModelDefaults } = await import('./config.js');
|
|
604
|
+
const modelDefaults = getModelDefaults(storedModelInfo.modelName);
|
|
605
|
+
const embedder = await initializeEmbeddingEngine(storedModelInfo.modelName, modelDefaults.batch_size);
|
|
606
|
+
// Initialize index manager with stored model info
|
|
607
|
+
const { IndexManager } = await import('./index-manager.js');
|
|
608
|
+
const indexManager = new IndexManager(config.index_file, config.db_file, storedModelInfo.dimensions, storedModelInfo.modelName);
|
|
609
|
+
await indexManager.initialize();
|
|
610
|
+
// Create search engine
|
|
611
|
+
this.searchEngine = SearchEngine.createWithComponents(embedder, indexManager, db, config.rerank_enabled);
|
|
612
|
+
await this.searchEngine.initialize();
|
|
613
|
+
this.isSearchEngineInitialized = true;
|
|
614
|
+
}
|
|
615
|
+
catch (error) {
|
|
616
|
+
// Check if this is a model mismatch error and re-throw with more context
|
|
617
|
+
if (error instanceof Error && (error.message.includes('Model mismatch detected') || error.message.includes('dimension mismatch'))) {
|
|
618
|
+
// Re-throw the original error - it already has good formatting from IndexManager
|
|
619
|
+
throw error;
|
|
620
|
+
}
|
|
621
|
+
// For other initialization errors, provide a generic wrapper
|
|
622
|
+
throw new Error(`Failed to initialize search engine: ${error instanceof Error ? error.message : 'Unknown error'}`);
|
|
623
|
+
}
|
|
624
|
+
}
|
|
625
|
+
/**
|
|
626
|
+
* Start the MCP server
|
|
627
|
+
* Ensures MCP server lives in same package as CLI with dual entry points
|
|
628
|
+
*/
|
|
629
|
+
async start() {
|
|
630
|
+
const transport = new StdioServerTransport();
|
|
631
|
+
await this.server.connect(transport);
|
|
632
|
+
// Server will run until the transport is closed
|
|
633
|
+
console.error('RAG-lite TS MCP Server started successfully');
|
|
634
|
+
}
|
|
635
|
+
}
|
|
636
|
+
/**
|
|
637
|
+
* Main entry point for MCP server
|
|
638
|
+
* Implements MCP protocol interface without creating REST/GraphQL endpoints
|
|
639
|
+
*/
|
|
640
|
+
async function main() {
|
|
641
|
+
try {
|
|
642
|
+
const server = new RagLiteMCPServer();
|
|
643
|
+
await server.start();
|
|
644
|
+
}
|
|
645
|
+
catch (error) {
|
|
646
|
+
if (error instanceof ConfigurationError) {
|
|
647
|
+
console.error('Configuration Error:', error.message);
|
|
648
|
+
process.exit(error.exitCode);
|
|
649
|
+
}
|
|
650
|
+
else {
|
|
651
|
+
console.error('Failed to start MCP server:', error instanceof Error ? error.message : String(error));
|
|
652
|
+
process.exit(1);
|
|
653
|
+
}
|
|
654
|
+
}
|
|
655
|
+
}
|
|
656
|
+
// Handle process signals for graceful shutdown
|
|
657
|
+
process.on('SIGINT', () => {
|
|
658
|
+
console.error('Received SIGINT, shutting down gracefully...');
|
|
659
|
+
process.exit(0);
|
|
660
|
+
});
|
|
661
|
+
process.on('SIGTERM', () => {
|
|
662
|
+
console.error('Received SIGTERM, shutting down gracefully...');
|
|
663
|
+
process.exit(0);
|
|
664
|
+
});
|
|
665
|
+
// Handle unhandled promise rejections
|
|
666
|
+
process.on('unhandledRejection', (reason, _promise) => {
|
|
667
|
+
console.error('Unhandled Promise Rejection:', reason);
|
|
668
|
+
process.exit(1);
|
|
669
|
+
});
|
|
670
|
+
// Handle uncaught exceptions
|
|
671
|
+
process.on('uncaughtException', (error) => {
|
|
672
|
+
console.error('Uncaught Exception:', error.message);
|
|
673
|
+
process.exit(1);
|
|
674
|
+
});
|
|
675
|
+
// Start the server
|
|
676
|
+
main().catch((error) => {
|
|
677
|
+
console.error('Fatal error:', error instanceof Error ? error.message : String(error));
|
|
678
|
+
process.exit(1);
|
|
679
|
+
});
|
|
680
|
+
//# sourceMappingURL=mcp-server.js.map
|