mcp-code-indexer 3.1.4__py3-none-any.whl → 3.1.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mcp_code_indexer/__init__.py +8 -6
- mcp_code_indexer/ask_handler.py +105 -75
- mcp_code_indexer/claude_api_handler.py +125 -82
- mcp_code_indexer/cleanup_manager.py +107 -81
- mcp_code_indexer/database/connection_health.py +212 -161
- mcp_code_indexer/database/database.py +529 -415
- mcp_code_indexer/database/exceptions.py +167 -118
- mcp_code_indexer/database/models.py +54 -19
- mcp_code_indexer/database/retry_executor.py +139 -103
- mcp_code_indexer/deepask_handler.py +178 -140
- mcp_code_indexer/error_handler.py +88 -76
- mcp_code_indexer/file_scanner.py +163 -141
- mcp_code_indexer/git_hook_handler.py +352 -261
- mcp_code_indexer/logging_config.py +76 -94
- mcp_code_indexer/main.py +406 -320
- mcp_code_indexer/middleware/error_middleware.py +106 -71
- mcp_code_indexer/query_preprocessor.py +40 -40
- mcp_code_indexer/server/mcp_server.py +785 -470
- mcp_code_indexer/token_counter.py +54 -47
- {mcp_code_indexer-3.1.4.dist-info → mcp_code_indexer-3.1.5.dist-info}/METADATA +3 -3
- mcp_code_indexer-3.1.5.dist-info/RECORD +37 -0
- mcp_code_indexer-3.1.4.dist-info/RECORD +0 -37
- {mcp_code_indexer-3.1.4.dist-info → mcp_code_indexer-3.1.5.dist-info}/WHEEL +0 -0
- {mcp_code_indexer-3.1.4.dist-info → mcp_code_indexer-3.1.5.dist-info}/entry_points.txt +0 -0
- {mcp_code_indexer-3.1.4.dist-info → mcp_code_indexer-3.1.5.dist-info}/licenses/LICENSE +0 -0
- {mcp_code_indexer-3.1.4.dist-info → mcp_code_indexer-3.1.5.dist-info}/top_level.txt +0 -0
@@ -6,7 +6,6 @@ for file description management tools.
|
|
6
6
|
"""
|
7
7
|
|
8
8
|
import asyncio
|
9
|
-
import hashlib
|
10
9
|
import html
|
11
10
|
import json
|
12
11
|
import logging
|
@@ -14,7 +13,7 @@ import re
|
|
14
13
|
import uuid
|
15
14
|
from datetime import datetime
|
16
15
|
from pathlib import Path
|
17
|
-
from typing import Any, Dict, List, Optional
|
16
|
+
from typing import Any, Dict, List, Optional
|
18
17
|
|
19
18
|
from mcp import types
|
20
19
|
from mcp.server import Server
|
@@ -25,12 +24,15 @@ from mcp_code_indexer.database.database import DatabaseManager
|
|
25
24
|
from mcp_code_indexer.file_scanner import FileScanner
|
26
25
|
from mcp_code_indexer.token_counter import TokenCounter
|
27
26
|
from mcp_code_indexer.database.models import (
|
28
|
-
Project,
|
29
|
-
|
30
|
-
|
27
|
+
Project,
|
28
|
+
FileDescription,
|
29
|
+
ProjectOverview,
|
30
|
+
)
|
31
|
+
from mcp_code_indexer.error_handler import setup_error_handling
|
32
|
+
from mcp_code_indexer.middleware.error_middleware import (
|
33
|
+
create_tool_middleware,
|
34
|
+
AsyncTaskManager,
|
31
35
|
)
|
32
|
-
from mcp_code_indexer.error_handler import setup_error_handling, ErrorHandler
|
33
|
-
from mcp_code_indexer.middleware.error_middleware import create_tool_middleware, AsyncTaskManager
|
34
36
|
from mcp_code_indexer.logging_config import get_logger
|
35
37
|
|
36
38
|
|
@@ -40,11 +42,11 @@ logger = logging.getLogger(__name__)
|
|
40
42
|
class MCPCodeIndexServer:
|
41
43
|
"""
|
42
44
|
MCP Code Index Server.
|
43
|
-
|
45
|
+
|
44
46
|
Provides file description tracking and codebase navigation tools
|
45
47
|
through the Model Context Protocol.
|
46
48
|
"""
|
47
|
-
|
49
|
+
|
48
50
|
def __init__(
|
49
51
|
self,
|
50
52
|
token_limit: int = 32000,
|
@@ -57,11 +59,11 @@ class MCPCodeIndexServer:
|
|
57
59
|
health_check_interval: float = 30.0,
|
58
60
|
retry_min_wait: float = 0.1,
|
59
61
|
retry_max_wait: float = 2.0,
|
60
|
-
retry_jitter: float = 0.2
|
62
|
+
retry_jitter: float = 0.2,
|
61
63
|
):
|
62
64
|
"""
|
63
65
|
Initialize the MCP Code Index Server.
|
64
|
-
|
66
|
+
|
65
67
|
Args:
|
66
68
|
token_limit: Maximum tokens before recommending search over overview
|
67
69
|
db_path: Path to SQLite database
|
@@ -78,7 +80,7 @@ class MCPCodeIndexServer:
|
|
78
80
|
self.token_limit = token_limit
|
79
81
|
self.db_path = db_path or Path.home() / ".mcp-code-index" / "tracker.db"
|
80
82
|
self.cache_dir = cache_dir or Path.home() / ".mcp-code-index" / "cache"
|
81
|
-
|
83
|
+
|
82
84
|
# Store database configuration
|
83
85
|
self.db_config = {
|
84
86
|
"pool_size": db_pool_size,
|
@@ -88,12 +90,12 @@ class MCPCodeIndexServer:
|
|
88
90
|
"health_check_interval": health_check_interval,
|
89
91
|
"retry_min_wait": retry_min_wait,
|
90
92
|
"retry_max_wait": retry_max_wait,
|
91
|
-
"retry_jitter": retry_jitter
|
93
|
+
"retry_jitter": retry_jitter,
|
92
94
|
}
|
93
|
-
|
95
|
+
|
94
96
|
# Initialize components
|
95
97
|
self.db_manager = DatabaseManager(
|
96
|
-
db_path=self.db_path,
|
98
|
+
db_path=self.db_path,
|
97
99
|
pool_size=db_pool_size,
|
98
100
|
retry_count=db_retry_count,
|
99
101
|
timeout=db_timeout,
|
@@ -101,58 +103,58 @@ class MCPCodeIndexServer:
|
|
101
103
|
health_check_interval=health_check_interval,
|
102
104
|
retry_min_wait=retry_min_wait,
|
103
105
|
retry_max_wait=retry_max_wait,
|
104
|
-
retry_jitter=retry_jitter
|
106
|
+
retry_jitter=retry_jitter,
|
105
107
|
)
|
106
108
|
self.token_counter = TokenCounter(token_limit)
|
107
|
-
|
109
|
+
|
108
110
|
# Setup error handling
|
109
111
|
self.logger = get_logger(__name__)
|
110
112
|
self.error_handler = setup_error_handling(self.logger)
|
111
113
|
self.middleware = create_tool_middleware(self.error_handler)
|
112
114
|
self.task_manager = AsyncTaskManager(self.error_handler)
|
113
|
-
|
115
|
+
|
114
116
|
# Create MCP server
|
115
117
|
self.server = Server("mcp-code-indexer")
|
116
|
-
|
118
|
+
|
117
119
|
# Register handlers
|
118
120
|
self._register_handlers()
|
119
|
-
|
121
|
+
|
120
122
|
# Add debug logging for server events
|
121
123
|
self.logger.debug("MCP server instance created and handlers registered")
|
122
|
-
|
124
|
+
|
123
125
|
self.logger.info(
|
124
|
-
"MCP Code Index Server initialized",
|
125
|
-
extra={"structured_data": {"initialization": {"token_limit": token_limit}}}
|
126
|
+
"MCP Code Index Server initialized",
|
127
|
+
extra={"structured_data": {"initialization": {"token_limit": token_limit}}},
|
126
128
|
)
|
127
|
-
|
129
|
+
|
128
130
|
def _clean_html_entities(self, text: str) -> str:
|
129
131
|
"""
|
130
132
|
Clean HTML entities from text to prevent encoding issues.
|
131
|
-
|
133
|
+
|
132
134
|
Args:
|
133
135
|
text: Text that may contain HTML entities
|
134
|
-
|
136
|
+
|
135
137
|
Returns:
|
136
138
|
Text with HTML entities decoded to proper characters
|
137
139
|
"""
|
138
140
|
if not text:
|
139
141
|
return text
|
140
|
-
|
142
|
+
|
141
143
|
# Decode HTML entities like < > & etc.
|
142
144
|
return html.unescape(text)
|
143
|
-
|
145
|
+
|
144
146
|
def _clean_arguments(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
|
145
147
|
"""
|
146
148
|
Clean HTML entities from all text arguments.
|
147
|
-
|
149
|
+
|
148
150
|
Args:
|
149
151
|
arguments: Dictionary of arguments to clean
|
150
|
-
|
152
|
+
|
151
153
|
Returns:
|
152
154
|
Dictionary with HTML entities decoded in all string values
|
153
155
|
"""
|
154
156
|
cleaned = {}
|
155
|
-
|
157
|
+
|
156
158
|
for key, value in arguments.items():
|
157
159
|
if isinstance(value, str):
|
158
160
|
cleaned[key] = self._clean_html_entities(value)
|
@@ -168,19 +170,19 @@ class MCPCodeIndexServer:
|
|
168
170
|
else:
|
169
171
|
# Pass through other types unchanged
|
170
172
|
cleaned[key] = value
|
171
|
-
|
173
|
+
|
172
174
|
return cleaned
|
173
|
-
|
175
|
+
|
174
176
|
def _parse_json_robust(self, json_str: str) -> Dict[str, Any]:
|
175
177
|
"""
|
176
178
|
Parse JSON with automatic repair for common issues.
|
177
|
-
|
179
|
+
|
178
180
|
Args:
|
179
181
|
json_str: JSON string that may have formatting issues
|
180
|
-
|
182
|
+
|
181
183
|
Returns:
|
182
184
|
Parsed JSON dictionary
|
183
|
-
|
185
|
+
|
184
186
|
Raises:
|
185
187
|
ValueError: If JSON cannot be parsed even after repair attempts
|
186
188
|
"""
|
@@ -189,264 +191,474 @@ class MCPCodeIndexServer:
|
|
189
191
|
return json.loads(json_str)
|
190
192
|
except json.JSONDecodeError as original_error:
|
191
193
|
logger.warning(f"Initial JSON parse failed: {original_error}")
|
192
|
-
|
194
|
+
|
193
195
|
# Try to repair common issues
|
194
196
|
repaired = json_str
|
195
|
-
|
197
|
+
|
196
198
|
# Fix 1: Quote unquoted URLs and paths
|
197
199
|
# Look for patterns like: "key": http://... or "key": /path/...
|
198
200
|
url_pattern = r'("[\w]+"):\s*([a-zA-Z][a-zA-Z0-9+.-]*://[^\s,}]+|/[^\s,}]*)'
|
199
201
|
repaired = re.sub(url_pattern, r'\1: "\2"', repaired)
|
200
|
-
|
202
|
+
|
201
203
|
# Fix 2: Quote unquoted boolean-like strings
|
202
|
-
# Look for: "key": true-ish-string or "key": false-ish-string
|
203
|
-
bool_pattern =
|
204
|
+
# Look for: "key": true-ish-string or "key": false-ish-string
|
205
|
+
bool_pattern = (
|
206
|
+
r'("[\w]+"):\s*([a-zA-Z][a-zA-Z0-9_-]*[a-zA-Z0-9])(?=\s*[,}])'
|
207
|
+
)
|
204
208
|
repaired = re.sub(bool_pattern, r'\1: "\2"', repaired)
|
205
|
-
|
209
|
+
|
206
210
|
# Fix 3: Remove trailing commas
|
207
|
-
repaired = re.sub(r
|
208
|
-
|
211
|
+
repaired = re.sub(r",(\s*[}\]])", r"\1", repaired)
|
212
|
+
|
209
213
|
# Fix 4: Ensure proper string quoting for common unquoted values
|
210
214
|
# Handle cases like: "key": value (where value should be "value")
|
211
215
|
unquoted_pattern = r'("[\w]+"):\s*([a-zA-Z0-9_-]+)(?=\s*[,}])'
|
212
216
|
repaired = re.sub(unquoted_pattern, r'\1: "\2"', repaired)
|
213
|
-
|
217
|
+
|
214
218
|
try:
|
215
219
|
result = json.loads(repaired)
|
216
|
-
logger.info(
|
220
|
+
logger.info(
|
221
|
+
f"Successfully repaired JSON. Original: {json_str[:100]}..."
|
222
|
+
)
|
217
223
|
logger.info(f"Repaired: {repaired[:100]}...")
|
218
224
|
return result
|
219
225
|
except json.JSONDecodeError as repair_error:
|
220
226
|
logger.error(f"JSON repair failed. Original: {json_str}")
|
221
227
|
logger.error(f"Repaired attempt: {repaired}")
|
222
|
-
raise ValueError(
|
223
|
-
|
228
|
+
raise ValueError(
|
229
|
+
f"Could not parse JSON even after repair attempts. "
|
230
|
+
f"Original error: {original_error}, Repair error: {repair_error}"
|
231
|
+
)
|
232
|
+
|
224
233
|
async def initialize(self) -> None:
|
225
234
|
"""Initialize database and other resources."""
|
226
235
|
await self.db_manager.initialize()
|
227
236
|
logger.info("Server initialized successfully")
|
228
|
-
|
237
|
+
|
229
238
|
def _register_handlers(self) -> None:
|
230
239
|
"""Register MCP tool and resource handlers."""
|
231
|
-
|
240
|
+
|
232
241
|
@self.server.list_tools()
|
233
242
|
async def list_tools() -> List[types.Tool]:
|
234
243
|
"""Return list of available tools."""
|
235
244
|
return [
|
236
245
|
types.Tool(
|
237
246
|
name="get_file_description",
|
238
|
-
description=
|
247
|
+
description=(
|
248
|
+
"Retrieves the stored description for a specific file in a "
|
249
|
+
"codebase. Use this to quickly understand what a file "
|
250
|
+
"contains without reading its full contents."
|
251
|
+
),
|
239
252
|
inputSchema={
|
240
253
|
"type": "object",
|
241
254
|
"properties": {
|
242
255
|
"projectName": {
|
243
256
|
"type": "string",
|
244
|
-
"description": "The name of the project"
|
257
|
+
"description": "The name of the project",
|
245
258
|
},
|
246
259
|
"folderPath": {
|
247
|
-
"type": "string",
|
248
|
-
"description":
|
260
|
+
"type": "string",
|
261
|
+
"description": (
|
262
|
+
"Absolute path to the project folder on disk"
|
263
|
+
),
|
249
264
|
},
|
250
|
-
|
251
|
-
|
252
265
|
"filePath": {
|
253
266
|
"type": "string",
|
254
|
-
"description":
|
255
|
-
|
267
|
+
"description": (
|
268
|
+
"Relative path to the file from project root"
|
269
|
+
),
|
270
|
+
},
|
256
271
|
},
|
257
272
|
"required": ["projectName", "folderPath", "filePath"],
|
258
|
-
"additionalProperties": False
|
259
|
-
}
|
273
|
+
"additionalProperties": False,
|
274
|
+
},
|
260
275
|
),
|
261
276
|
types.Tool(
|
262
277
|
name="update_file_description",
|
263
|
-
description=
|
278
|
+
description=(
|
279
|
+
"Creates or updates the description for a file. Use this "
|
280
|
+
"after analyzing a file's contents to store a detailed summary."
|
281
|
+
),
|
264
282
|
inputSchema={
|
265
283
|
"type": "object",
|
266
284
|
"properties": {
|
267
|
-
"projectName": {
|
268
|
-
|
269
|
-
|
270
|
-
|
271
|
-
"
|
272
|
-
|
285
|
+
"projectName": {
|
286
|
+
"type": "string",
|
287
|
+
"description": "The name of the project",
|
288
|
+
},
|
289
|
+
"folderPath": {
|
290
|
+
"type": "string",
|
291
|
+
"description": (
|
292
|
+
"Absolute path to the project folder on disk"
|
293
|
+
),
|
294
|
+
},
|
295
|
+
"filePath": {
|
296
|
+
"type": "string",
|
297
|
+
"description": (
|
298
|
+
"Relative path to the file from project root"
|
299
|
+
),
|
300
|
+
},
|
301
|
+
"description": {
|
302
|
+
"type": "string",
|
303
|
+
"description": (
|
304
|
+
"Detailed description of the file's contents"
|
305
|
+
),
|
306
|
+
},
|
307
|
+
"fileHash": {
|
308
|
+
"type": "string",
|
309
|
+
"description": (
|
310
|
+
"SHA-256 hash of the file contents (optional)"
|
311
|
+
),
|
312
|
+
},
|
273
313
|
},
|
274
|
-
"required": [
|
275
|
-
|
276
|
-
|
314
|
+
"required": [
|
315
|
+
"projectName",
|
316
|
+
"folderPath",
|
317
|
+
"filePath",
|
318
|
+
"description",
|
319
|
+
],
|
320
|
+
"additionalProperties": False,
|
321
|
+
},
|
277
322
|
),
|
278
323
|
types.Tool(
|
279
324
|
name="check_codebase_size",
|
280
|
-
description=
|
325
|
+
description=(
|
326
|
+
"Checks the total token count of a codebase's file structure "
|
327
|
+
"and descriptions. Returns whether the codebase is 'large' "
|
328
|
+
"and recommends using search instead of the full overview."
|
329
|
+
),
|
281
330
|
inputSchema={
|
282
331
|
"type": "object",
|
283
332
|
"properties": {
|
284
|
-
"projectName": {
|
285
|
-
|
286
|
-
|
287
|
-
|
333
|
+
"projectName": {
|
334
|
+
"type": "string",
|
335
|
+
"description": "The name of the project",
|
336
|
+
},
|
337
|
+
"folderPath": {
|
338
|
+
"type": "string",
|
339
|
+
"description": (
|
340
|
+
"Absolute path to the project folder on disk"
|
341
|
+
),
|
342
|
+
},
|
343
|
+
"tokenLimit": {
|
344
|
+
"type": "integer",
|
345
|
+
"description": (
|
346
|
+
"Optional token limit override "
|
347
|
+
"(defaults to server configuration)"
|
348
|
+
),
|
349
|
+
},
|
288
350
|
},
|
289
351
|
"required": ["projectName", "folderPath"],
|
290
|
-
"additionalProperties": False
|
291
|
-
}
|
352
|
+
"additionalProperties": False,
|
353
|
+
},
|
292
354
|
),
|
293
355
|
types.Tool(
|
294
356
|
name="find_missing_descriptions",
|
295
|
-
description=
|
357
|
+
description=(
|
358
|
+
"Scans the project folder to find files that don't have "
|
359
|
+
"descriptions yet. Use update_file_description to add "
|
360
|
+
"descriptions for individual files."
|
361
|
+
),
|
296
362
|
inputSchema={
|
297
363
|
"type": "object",
|
298
364
|
"properties": {
|
299
|
-
"projectName": {
|
300
|
-
|
301
|
-
|
302
|
-
|
365
|
+
"projectName": {
|
366
|
+
"type": "string",
|
367
|
+
"description": "The name of the project",
|
368
|
+
},
|
369
|
+
"folderPath": {
|
370
|
+
"type": "string",
|
371
|
+
"description": (
|
372
|
+
"Absolute path to the project folder on disk"
|
373
|
+
),
|
374
|
+
},
|
375
|
+
"limit": {
|
376
|
+
"type": "integer",
|
377
|
+
"description": (
|
378
|
+
"Maximum number of missing files to return "
|
379
|
+
"(optional)"
|
380
|
+
),
|
381
|
+
},
|
303
382
|
},
|
304
383
|
"required": ["projectName", "folderPath"],
|
305
|
-
"additionalProperties": False
|
306
|
-
}
|
384
|
+
"additionalProperties": False,
|
385
|
+
},
|
307
386
|
),
|
308
387
|
types.Tool(
|
309
388
|
name="search_descriptions",
|
310
|
-
description=
|
389
|
+
description=(
|
390
|
+
"Searches through all file descriptions in a project to find "
|
391
|
+
"files related to specific functionality. Use this for large "
|
392
|
+
"codebases instead of loading the entire structure. Always "
|
393
|
+
"start with the fewest terms possible (1 to 3 words AT MOST); "
|
394
|
+
"if the tool returns a lot of results (more than 20) or the "
|
395
|
+
"results are not relevant, then narrow it down by increasing "
|
396
|
+
"the number of search words one at a time and calling the tool "
|
397
|
+
"again. Start VERY broad, then narrow the focus only if needed!"
|
398
|
+
),
|
311
399
|
inputSchema={
|
312
400
|
"type": "object",
|
313
401
|
"properties": {
|
314
|
-
"projectName": {
|
315
|
-
|
316
|
-
|
317
|
-
|
318
|
-
"
|
402
|
+
"projectName": {
|
403
|
+
"type": "string",
|
404
|
+
"description": "The name of the project",
|
405
|
+
},
|
406
|
+
"folderPath": {
|
407
|
+
"type": "string",
|
408
|
+
"description": (
|
409
|
+
"Absolute path to the project folder on disk"
|
410
|
+
),
|
411
|
+
},
|
412
|
+
"query": {
|
413
|
+
"type": "string",
|
414
|
+
"description": (
|
415
|
+
"Search query (e.g., 'authentication middleware', "
|
416
|
+
"'database models')"
|
417
|
+
),
|
418
|
+
},
|
419
|
+
"maxResults": {
|
420
|
+
"type": "integer",
|
421
|
+
"default": 20,
|
422
|
+
"description": "Maximum number of results to return",
|
423
|
+
},
|
319
424
|
},
|
320
425
|
"required": ["projectName", "folderPath", "query"],
|
321
|
-
"additionalProperties": False
|
322
|
-
}
|
426
|
+
"additionalProperties": False,
|
427
|
+
},
|
323
428
|
),
|
324
429
|
types.Tool(
|
325
430
|
name="get_all_descriptions",
|
326
|
-
description=
|
431
|
+
description=(
|
432
|
+
"Returns the complete file-by-file structure of a codebase "
|
433
|
+
"with individual descriptions for each file. For large "
|
434
|
+
"codebases, consider using get_codebase_overview for a "
|
435
|
+
"condensed summary instead."
|
436
|
+
),
|
327
437
|
inputSchema={
|
328
438
|
"type": "object",
|
329
439
|
"properties": {
|
330
|
-
"projectName": {
|
331
|
-
|
440
|
+
"projectName": {
|
441
|
+
"type": "string",
|
442
|
+
"description": "The name of the project",
|
443
|
+
},
|
444
|
+
"folderPath": {
|
445
|
+
"type": "string",
|
446
|
+
"description": (
|
447
|
+
"Absolute path to the project folder on disk"
|
448
|
+
),
|
449
|
+
},
|
332
450
|
},
|
333
451
|
"required": ["projectName", "folderPath"],
|
334
|
-
"additionalProperties": False
|
335
|
-
}
|
452
|
+
"additionalProperties": False,
|
453
|
+
},
|
336
454
|
),
|
337
455
|
types.Tool(
|
338
456
|
name="get_codebase_overview",
|
339
|
-
description=
|
457
|
+
description=(
|
458
|
+
"Returns a condensed, interpretive overview of the entire "
|
459
|
+
"codebase. This is a single comprehensive narrative that "
|
460
|
+
"captures the architecture, key components, relationships, "
|
461
|
+
"and design patterns. Unlike get_all_descriptions which "
|
462
|
+
"lists every file, this provides a holistic view suitable "
|
463
|
+
"for understanding the codebase's structure and purpose. "
|
464
|
+
"If no overview exists, returns empty string."
|
465
|
+
),
|
340
466
|
inputSchema={
|
341
467
|
"type": "object",
|
342
468
|
"properties": {
|
343
|
-
"projectName": {
|
344
|
-
|
469
|
+
"projectName": {
|
470
|
+
"type": "string",
|
471
|
+
"description": "The name of the project",
|
472
|
+
},
|
473
|
+
"folderPath": {
|
474
|
+
"type": "string",
|
475
|
+
"description": (
|
476
|
+
"Absolute path to the project folder on disk"
|
477
|
+
),
|
478
|
+
},
|
345
479
|
},
|
346
480
|
"required": ["projectName", "folderPath"],
|
347
|
-
"additionalProperties": False
|
348
|
-
}
|
481
|
+
"additionalProperties": False,
|
482
|
+
},
|
349
483
|
),
|
350
484
|
types.Tool(
|
351
485
|
name="update_codebase_overview",
|
352
|
-
description=
|
353
|
-
|
354
|
-
|
355
|
-
|
356
|
-
|
357
|
-
|
358
|
-
|
359
|
-
|
360
|
-
|
361
|
-
|
362
|
-
|
363
|
-
|
364
|
-
|
365
|
-
|
366
|
-
|
367
|
-
|
368
|
-
|
369
|
-
|
370
|
-
|
371
|
-
|
372
|
-
|
373
|
-
|
374
|
-
|
375
|
-
|
376
|
-
|
377
|
-
|
378
|
-
|
379
|
-
|
380
|
-
|
381
|
-
|
382
|
-
|
383
|
-
|
384
|
-
|
385
|
-
|
386
|
-
|
387
|
-
-
|
388
|
-
|
486
|
+
description=(
|
487
|
+
"Creates a concise codebase overview for AI agents. Focus on "
|
488
|
+
"essential navigation and context in 3500-7000 words. Include: "
|
489
|
+
"(1) One-paragraph system summary - what it does and its core "
|
490
|
+
"purpose, (2) Directory tree with one-line descriptions for "
|
491
|
+
"each major folder, (3) Key architectural patterns (e.g., MVC, "
|
492
|
+
"microservices, event-driven) in 2-3 sentences, (4) Critical "
|
493
|
+
"file locations (entry points, config, main business logic), "
|
494
|
+
"(5) Essential conventions (naming, file organization, error "
|
495
|
+
"handling), (6) Important gotchas or non-obvious connections. "
|
496
|
+
"Keep it scannable and action-oriented.\n\n"
|
497
|
+
"Example:\n\n"
|
498
|
+
"````\n"
|
499
|
+
"## System Summary\n"
|
500
|
+
"E-commerce platform handling product catalog, orders, "
|
501
|
+
"and payments with React frontend and Node.js API.\n\n"
|
502
|
+
"## Directory Structure\n"
|
503
|
+
"```\n"
|
504
|
+
"src/\n"
|
505
|
+
"├── api/ # REST endpoints "
|
506
|
+
"(auth in auth.js, orders in orders/)\n"
|
507
|
+
"├── models/ # Sequelize models "
|
508
|
+
"(User, Product, Order)\n"
|
509
|
+
"├── services/ # Stripe (payments/), "
|
510
|
+
"SendGrid (email/)\n"
|
511
|
+
"├── client/ # React app "
|
512
|
+
"(components/, pages/, hooks/)\n"
|
513
|
+
"└── shared/ # Types and constants used "
|
514
|
+
"by both API and client\n"
|
515
|
+
"```\n\n"
|
516
|
+
"## Architecture\n"
|
517
|
+
"RESTful API with JWT auth. React frontend calls API. "
|
518
|
+
"Background jobs via Bull queue. PostgreSQL with "
|
519
|
+
"Sequelize ORM.\n\n"
|
520
|
+
"## Key Files\n"
|
521
|
+
"- Entry: `src/index.js` "
|
522
|
+
"(starts Express server)\n"
|
523
|
+
"- Config: `src/config/` "
|
524
|
+
"(env-specific settings)\n"
|
525
|
+
"- Routes: `src/api/routes.js` "
|
526
|
+
"(all endpoints defined here)\n"
|
527
|
+
"- Auth: `src/middleware/auth.js` "
|
528
|
+
"(JWT validation)\n\n"
|
529
|
+
"## Conventions\n"
|
530
|
+
"- Files named `[entity].service.js` "
|
531
|
+
"handle business logic\n"
|
532
|
+
"- All API routes return "
|
533
|
+
"`{ success: boolean, data?: any, error?: string }`\n"
|
534
|
+
"- Database migrations in `migrations/` - "
|
535
|
+
"run before adding models\n\n"
|
536
|
+
"## Important Notes\n"
|
537
|
+
"- Payment webhooks MUST be idempotent "
|
538
|
+
"(check `processedWebhooks` table)\n"
|
539
|
+
"- User emails are case-insensitive "
|
540
|
+
"(lowercase in DB)\n"
|
541
|
+
"- Order status transitions enforced in "
|
542
|
+
"`Order.beforeUpdate` hook\n"
|
543
|
+
"````"
|
544
|
+
),
|
389
545
|
inputSchema={
|
390
546
|
"type": "object",
|
391
547
|
"properties": {
|
392
|
-
"projectName": {
|
393
|
-
|
394
|
-
|
548
|
+
"projectName": {
|
549
|
+
"type": "string",
|
550
|
+
"description": "The name of the project",
|
551
|
+
},
|
552
|
+
"folderPath": {
|
553
|
+
"type": "string",
|
554
|
+
"description": (
|
555
|
+
"Absolute path to the project folder on disk"
|
556
|
+
),
|
557
|
+
},
|
558
|
+
"overview": {
|
559
|
+
"type": "string",
|
560
|
+
"description": (
|
561
|
+
"Concise codebase overview "
|
562
|
+
"(aim for 3500-7500 words / 5k-10k tokens)"
|
563
|
+
),
|
564
|
+
},
|
395
565
|
},
|
396
566
|
"required": ["projectName", "folderPath", "overview"],
|
397
|
-
"additionalProperties": False
|
398
|
-
}
|
567
|
+
"additionalProperties": False,
|
568
|
+
},
|
399
569
|
),
|
400
570
|
types.Tool(
|
401
571
|
name="get_word_frequency",
|
402
|
-
description=
|
572
|
+
description=(
|
573
|
+
"Analyzes all file descriptions to find the most frequently "
|
574
|
+
"used technical terms. Filters out common English stop words "
|
575
|
+
"and symbols, returning the top 200 meaningful terms. Useful "
|
576
|
+
"for understanding the codebase's domain vocabulary and "
|
577
|
+
"finding all functions/files related to specific concepts."
|
578
|
+
),
|
403
579
|
inputSchema={
|
404
580
|
"type": "object",
|
405
581
|
"properties": {
|
406
|
-
"projectName": {
|
407
|
-
|
408
|
-
|
409
|
-
|
582
|
+
"projectName": {
|
583
|
+
"type": "string",
|
584
|
+
"description": "The name of the project",
|
585
|
+
},
|
586
|
+
"folderPath": {
|
587
|
+
"type": "string",
|
588
|
+
"description": (
|
589
|
+
"Absolute path to the project folder on disk"
|
590
|
+
),
|
591
|
+
},
|
592
|
+
"limit": {
|
593
|
+
"type": "integer",
|
594
|
+
"default": 200,
|
595
|
+
"description": "Number of top terms to return",
|
596
|
+
},
|
410
597
|
},
|
411
598
|
"required": ["projectName", "folderPath"],
|
412
|
-
"additionalProperties": False
|
413
|
-
}
|
599
|
+
"additionalProperties": False,
|
600
|
+
},
|
414
601
|
),
|
415
602
|
types.Tool(
|
416
603
|
name="check_database_health",
|
417
|
-
description=
|
604
|
+
description=(
|
605
|
+
"Perform health diagnostics for the MCP Code Indexer's SQLite "
|
606
|
+
"database and connection pool. Returns database resilience "
|
607
|
+
"metrics, connection pool status, WAL mode performance, and "
|
608
|
+
"file description storage statistics for monitoring the code "
|
609
|
+
"indexer's database locking improvements."
|
610
|
+
),
|
418
611
|
inputSchema={
|
419
612
|
"type": "object",
|
420
613
|
"properties": {},
|
421
|
-
"additionalProperties": False
|
422
|
-
}
|
614
|
+
"additionalProperties": False,
|
615
|
+
},
|
423
616
|
),
|
424
617
|
types.Tool(
|
425
618
|
name="search_codebase_overview",
|
426
|
-
description=
|
619
|
+
description=(
|
620
|
+
"Search for a single word in the codebase overview and return "
|
621
|
+
"2 sentences before and after where the word is found. Useful "
|
622
|
+
"for quickly finding specific information in large overviews."
|
623
|
+
),
|
427
624
|
inputSchema={
|
428
625
|
"type": "object",
|
429
626
|
"properties": {
|
430
|
-
"projectName": {
|
431
|
-
|
432
|
-
|
433
|
-
|
627
|
+
"projectName": {
|
628
|
+
"type": "string",
|
629
|
+
"description": "The name of the project",
|
630
|
+
},
|
631
|
+
"folderPath": {
|
632
|
+
"type": "string",
|
633
|
+
"description": (
|
634
|
+
"Absolute path to the project folder on disk"
|
635
|
+
),
|
636
|
+
},
|
637
|
+
"searchWord": {
|
638
|
+
"type": "string",
|
639
|
+
"description": (
|
640
|
+
"Single word to search for in the overview"
|
641
|
+
),
|
642
|
+
},
|
434
643
|
},
|
435
644
|
"required": ["projectName", "folderPath", "searchWord"],
|
436
|
-
"additionalProperties": False
|
437
|
-
}
|
438
|
-
)
|
645
|
+
"additionalProperties": False,
|
646
|
+
},
|
647
|
+
),
|
439
648
|
]
|
440
|
-
|
649
|
+
|
441
650
|
@self.server.call_tool()
|
442
|
-
async def call_tool(
|
651
|
+
async def call_tool(
|
652
|
+
name: str, arguments: Dict[str, Any]
|
653
|
+
) -> List[types.TextContent]:
|
443
654
|
"""Handle tool calls with middleware."""
|
444
655
|
import time
|
656
|
+
|
445
657
|
start_time = time.time()
|
446
|
-
|
658
|
+
|
447
659
|
logger.info(f"=== MCP Tool Call: {name} ===")
|
448
660
|
logger.info(f"Arguments: {', '.join(arguments.keys())}")
|
449
|
-
|
661
|
+
|
450
662
|
# Map tool names to handler methods
|
451
663
|
tool_handlers = {
|
452
664
|
"get_file_description": self._handle_get_file_description,
|
@@ -458,67 +670,69 @@ RESTful API with JWT auth. React frontend calls API. Background jobs via Bull qu
|
|
458
670
|
"get_codebase_overview": self._handle_get_condensed_overview,
|
459
671
|
"update_codebase_overview": self._handle_update_codebase_overview,
|
460
672
|
"get_word_frequency": self._handle_get_word_frequency,
|
461
|
-
|
462
673
|
"check_database_health": self._handle_check_database_health,
|
463
674
|
"search_codebase_overview": self._handle_search_codebase_overview,
|
464
675
|
}
|
465
|
-
|
676
|
+
|
466
677
|
if name not in tool_handlers:
|
467
678
|
logger.error(f"Unknown tool requested: {name}")
|
468
679
|
from ..error_handler import ValidationError
|
680
|
+
|
469
681
|
raise ValidationError(f"Unknown tool: {name}")
|
470
|
-
|
682
|
+
|
471
683
|
# Wrap handler with middleware
|
472
684
|
wrapped_handler = self.middleware.wrap_tool_handler(name)(
|
473
685
|
lambda args: self._execute_tool_handler(tool_handlers[name], args)
|
474
686
|
)
|
475
|
-
|
687
|
+
|
476
688
|
try:
|
477
689
|
result = await wrapped_handler(arguments)
|
478
|
-
|
690
|
+
|
479
691
|
elapsed_time = time.time() - start_time
|
480
|
-
logger.info(
|
481
|
-
|
692
|
+
logger.info(
|
693
|
+
f"MCP Tool '{name}' completed successfully in {elapsed_time:.2f}s"
|
694
|
+
)
|
695
|
+
|
482
696
|
return result
|
483
697
|
except Exception as e:
|
484
698
|
elapsed_time = time.time() - start_time
|
485
699
|
logger.error(f"MCP Tool '{name}' failed after {elapsed_time:.2f}s: {e}")
|
486
700
|
logger.error(f"Exception details: {type(e).__name__}: {str(e)}")
|
487
701
|
raise
|
488
|
-
|
489
|
-
async def _execute_tool_handler(
|
702
|
+
|
703
|
+
async def _execute_tool_handler(
|
704
|
+
self, handler, arguments: Dict[str, Any]
|
705
|
+
) -> List[types.TextContent]:
|
490
706
|
"""Execute a tool handler and format the result."""
|
491
707
|
# Clean HTML entities from all arguments before processing
|
492
708
|
cleaned_arguments = self._clean_arguments(arguments)
|
493
|
-
|
709
|
+
|
494
710
|
result = await handler(cleaned_arguments)
|
495
|
-
|
496
|
-
return [
|
497
|
-
|
498
|
-
|
499
|
-
|
500
|
-
|
711
|
+
|
712
|
+
return [
|
713
|
+
types.TextContent(
|
714
|
+
type="text", text=json.dumps(result, indent=2, default=str)
|
715
|
+
)
|
716
|
+
]
|
717
|
+
|
501
718
|
async def _get_or_create_project_id(self, arguments: Dict[str, Any]) -> str:
|
502
719
|
"""
|
503
720
|
Get or create a project ID using intelligent matching.
|
504
|
-
|
721
|
+
|
505
722
|
Matches projects based on identification factors:
|
506
723
|
1. Project name (normalized, case-insensitive)
|
507
724
|
2. Folder path in aliases
|
508
|
-
|
725
|
+
|
509
726
|
Projects are now identified primarily by name without git coupling.
|
510
727
|
"""
|
511
728
|
project_name = arguments["projectName"]
|
512
729
|
folder_path = arguments["folderPath"]
|
513
730
|
|
514
|
-
|
515
731
|
# Normalize project name for case-insensitive matching
|
516
732
|
normalized_name = project_name.lower()
|
517
|
-
|
733
|
+
|
518
734
|
# Find potential project matches
|
519
|
-
project = await self._find_matching_project(
|
520
|
-
normalized_name, folder_path
|
521
|
-
)
|
735
|
+
project = await self._find_matching_project(normalized_name, folder_path)
|
522
736
|
if project:
|
523
737
|
# Update project metadata and aliases
|
524
738
|
await self._update_existing_project(project, normalized_name, folder_path)
|
@@ -530,63 +744,72 @@ RESTful API with JWT auth. React frontend calls API. Background jobs via Bull qu
|
|
530
744
|
name=normalized_name,
|
531
745
|
aliases=[folder_path],
|
532
746
|
created=datetime.utcnow(),
|
533
|
-
last_accessed=datetime.utcnow()
|
747
|
+
last_accessed=datetime.utcnow(),
|
534
748
|
)
|
535
749
|
await self.db_manager.create_project(project)
|
536
750
|
logger.info(f"Created new project: {normalized_name} ({project_id})")
|
537
|
-
|
751
|
+
|
538
752
|
return project.id
|
539
|
-
|
753
|
+
|
540
754
|
async def _find_matching_project(
|
541
|
-
self,
|
542
|
-
normalized_name: str,
|
543
|
-
folder_path: str
|
755
|
+
self, normalized_name: str, folder_path: str
|
544
756
|
) -> Optional[Project]:
|
545
757
|
"""
|
546
758
|
Find a matching project using name and folder path matching.
|
547
|
-
|
759
|
+
|
548
760
|
Returns the best matching project or None if no sufficient match is found.
|
549
761
|
"""
|
550
762
|
all_projects = await self.db_manager.get_all_projects()
|
551
|
-
|
763
|
+
|
552
764
|
best_match = None
|
553
765
|
best_score = 0
|
554
|
-
|
766
|
+
|
555
767
|
for project in all_projects:
|
556
768
|
score = 0
|
557
769
|
match_factors = []
|
558
|
-
|
770
|
+
|
559
771
|
# Factor 1: Project name match (primary identifier)
|
560
772
|
if project.name.lower() == normalized_name:
|
561
773
|
score += 2 # Higher weight for name match
|
562
774
|
match_factors.append("name")
|
563
|
-
|
775
|
+
|
564
776
|
# Factor 2: Folder path in aliases
|
565
|
-
project_aliases =
|
777
|
+
project_aliases = (
|
778
|
+
json.loads(project.aliases)
|
779
|
+
if isinstance(project.aliases, str)
|
780
|
+
else project.aliases
|
781
|
+
)
|
566
782
|
if folder_path in project_aliases:
|
567
783
|
score += 1
|
568
784
|
match_factors.append("folder_path")
|
569
|
-
|
785
|
+
|
570
786
|
# If we have a name match, it's a strong candidate
|
571
787
|
if score >= 2:
|
572
788
|
if score > best_score:
|
573
789
|
best_score = score
|
574
790
|
best_match = project
|
575
|
-
logger.info(
|
576
|
-
|
791
|
+
logger.info(
|
792
|
+
f"Match for project {project.name} "
|
793
|
+
f"(score: {score}, factors: {match_factors})"
|
794
|
+
)
|
795
|
+
|
577
796
|
# If only name matches, check file similarity for potential matches
|
578
797
|
elif score == 1 and "name" in match_factors:
|
579
798
|
if await self._check_file_similarity(project, folder_path):
|
580
|
-
logger.info(
|
799
|
+
logger.info(
|
800
|
+
f"File similarity match for project {project.name} "
|
801
|
+
f"(factor: {match_factors[0]})"
|
802
|
+
)
|
581
803
|
if score > best_score:
|
582
804
|
best_score = score
|
583
805
|
best_match = project
|
584
|
-
|
806
|
+
|
585
807
|
return best_match
|
586
|
-
|
808
|
+
|
587
809
|
async def _check_file_similarity(self, project: Project, folder_path: str) -> bool:
|
588
810
|
"""
|
589
|
-
Check if the files in the folder are similar to files already indexed
|
811
|
+
Check if the files in the folder are similar to files already indexed
|
812
|
+
for this project.
|
590
813
|
Returns True if 80%+ of files match.
|
591
814
|
"""
|
592
815
|
try:
|
@@ -594,163 +817,182 @@ RESTful API with JWT auth. React frontend calls API. Background jobs via Bull qu
|
|
594
817
|
scanner = FileScanner(Path(folder_path))
|
595
818
|
if not scanner.is_valid_project_directory():
|
596
819
|
return False
|
597
|
-
|
820
|
+
|
598
821
|
current_files = scanner.scan_directory()
|
599
822
|
current_basenames = {f.name for f in current_files}
|
600
|
-
|
823
|
+
|
601
824
|
if not current_basenames:
|
602
825
|
return False
|
603
|
-
|
826
|
+
|
604
827
|
# Get files already indexed for this project
|
605
828
|
indexed_files = await self.db_manager.get_all_file_descriptions(project.id)
|
606
829
|
indexed_basenames = {Path(fd.file_path).name for fd in indexed_files}
|
607
|
-
|
830
|
+
|
608
831
|
if not indexed_basenames:
|
609
832
|
return False
|
610
|
-
|
833
|
+
|
611
834
|
# Calculate similarity
|
612
835
|
intersection = current_basenames & indexed_basenames
|
613
836
|
similarity = len(intersection) / len(current_basenames)
|
614
|
-
|
615
|
-
logger.debug(
|
616
|
-
|
837
|
+
|
838
|
+
logger.debug(
|
839
|
+
f"File similarity for {project.name}: {similarity:.2%} "
|
840
|
+
f"({len(intersection)}/{len(current_basenames)} files match)"
|
841
|
+
)
|
842
|
+
|
617
843
|
return similarity >= 0.8
|
618
844
|
except Exception as e:
|
619
845
|
logger.warning(f"Error checking file similarity: {e}")
|
620
846
|
return False
|
621
|
-
|
847
|
+
|
622
848
|
async def _update_existing_project(
|
623
|
-
self,
|
624
|
-
project: Project,
|
625
|
-
normalized_name: str,
|
626
|
-
folder_path: str
|
849
|
+
self, project: Project, normalized_name: str, folder_path: str
|
627
850
|
) -> None:
|
628
851
|
"""Update an existing project with new metadata and folder alias."""
|
629
852
|
# Update last accessed time
|
630
853
|
await self.db_manager.update_project_access_time(project.id)
|
631
|
-
|
854
|
+
|
632
855
|
should_update = False
|
633
|
-
|
856
|
+
|
634
857
|
# Update name if different
|
635
858
|
if project.name != normalized_name:
|
636
859
|
project.name = normalized_name
|
637
860
|
should_update = True
|
638
|
-
|
861
|
+
|
639
862
|
# Add folder path to aliases if not already present
|
640
|
-
project_aliases =
|
863
|
+
project_aliases = (
|
864
|
+
json.loads(project.aliases)
|
865
|
+
if isinstance(project.aliases, str)
|
866
|
+
else project.aliases
|
867
|
+
)
|
641
868
|
if folder_path not in project_aliases:
|
642
869
|
project_aliases.append(folder_path)
|
643
870
|
project.aliases = project_aliases
|
644
871
|
should_update = True
|
645
|
-
logger.info(
|
646
|
-
|
872
|
+
logger.info(
|
873
|
+
f"Added new folder alias to project {project.name}: {folder_path}"
|
874
|
+
)
|
875
|
+
|
647
876
|
if should_update:
|
648
877
|
await self.db_manager.update_project(project)
|
649
878
|
logger.debug(f"Updated project metadata for {project.name}")
|
650
|
-
|
651
879
|
|
652
|
-
|
653
|
-
|
880
|
+
async def _handle_get_file_description(
|
881
|
+
self, arguments: Dict[str, Any]
|
882
|
+
) -> Dict[str, Any]:
|
654
883
|
"""Handle get_file_description tool calls."""
|
655
884
|
project_id = await self._get_or_create_project_id(arguments)
|
656
|
-
|
885
|
+
|
657
886
|
file_desc = await self.db_manager.get_file_description(
|
658
|
-
project_id=project_id,
|
659
|
-
file_path=arguments["filePath"]
|
887
|
+
project_id=project_id, file_path=arguments["filePath"]
|
660
888
|
)
|
661
|
-
|
889
|
+
|
662
890
|
if file_desc:
|
663
891
|
return {
|
664
892
|
"exists": True,
|
665
893
|
"description": file_desc.description,
|
666
894
|
"lastModified": file_desc.last_modified.isoformat(),
|
667
895
|
"fileHash": file_desc.file_hash,
|
668
|
-
"version": file_desc.version
|
896
|
+
"version": file_desc.version,
|
669
897
|
}
|
670
898
|
else:
|
671
899
|
return {
|
672
900
|
"exists": False,
|
673
|
-
"message": f"No description found for {arguments['filePath']}"
|
901
|
+
"message": f"No description found for {arguments['filePath']}",
|
674
902
|
}
|
675
|
-
|
676
|
-
async def _handle_update_file_description(
|
903
|
+
|
904
|
+
async def _handle_update_file_description(
|
905
|
+
self, arguments: Dict[str, Any]
|
906
|
+
) -> Dict[str, Any]:
|
677
907
|
"""Handle update_file_description tool calls."""
|
678
908
|
logger.info(f"Updating file description for: {arguments['filePath']}")
|
679
909
|
logger.info(f"Project: {arguments.get('projectName', 'Unknown')}")
|
680
|
-
|
910
|
+
|
681
911
|
description_length = len(arguments.get("description", ""))
|
682
912
|
logger.info(f"Description length: {description_length} characters")
|
683
|
-
|
913
|
+
|
684
914
|
project_id = await self._get_or_create_project_id(arguments)
|
685
|
-
|
915
|
+
|
686
916
|
logger.info(f"Resolved project_id: {project_id}")
|
687
|
-
|
917
|
+
|
688
918
|
file_desc = FileDescription(
|
689
919
|
project_id=project_id,
|
690
920
|
file_path=arguments["filePath"],
|
691
921
|
description=arguments["description"],
|
692
922
|
file_hash=arguments.get("fileHash"),
|
693
923
|
last_modified=datetime.utcnow(),
|
694
|
-
version=1
|
924
|
+
version=1,
|
695
925
|
)
|
696
|
-
|
926
|
+
|
697
927
|
await self.db_manager.create_file_description(file_desc)
|
698
|
-
|
928
|
+
|
699
929
|
logger.info(f"Successfully updated description for: {arguments['filePath']}")
|
700
|
-
|
930
|
+
|
701
931
|
return {
|
702
932
|
"success": True,
|
703
933
|
"message": f"Description updated for {arguments['filePath']}",
|
704
934
|
"filePath": arguments["filePath"],
|
705
|
-
"lastModified": file_desc.last_modified.isoformat()
|
935
|
+
"lastModified": file_desc.last_modified.isoformat(),
|
706
936
|
}
|
707
|
-
|
708
|
-
async def _handle_check_codebase_size(
|
937
|
+
|
938
|
+
async def _handle_check_codebase_size(
|
939
|
+
self, arguments: Dict[str, Any]
|
940
|
+
) -> Dict[str, Any]:
|
709
941
|
"""Handle check_codebase_size tool calls."""
|
710
|
-
logger.info(
|
942
|
+
logger.info(
|
943
|
+
f"Checking codebase size for: {arguments.get('projectName', 'Unknown')}"
|
944
|
+
)
|
711
945
|
logger.info(f"Folder path: {arguments.get('folderPath', 'Unknown')}")
|
712
|
-
|
946
|
+
|
713
947
|
project_id = await self._get_or_create_project_id(arguments)
|
714
948
|
folder_path = Path(arguments["folderPath"])
|
715
|
-
|
949
|
+
|
716
950
|
logger.info(f"Resolved project_id: {project_id}")
|
717
|
-
|
951
|
+
|
718
952
|
# Clean up descriptions for files that no longer exist
|
719
953
|
logger.info("Cleaning up descriptions for missing files...")
|
720
954
|
cleaned_up_files = await self.db_manager.cleanup_missing_files(
|
721
|
-
project_id=project_id,
|
722
|
-
project_root=folder_path
|
955
|
+
project_id=project_id, project_root=folder_path
|
723
956
|
)
|
724
957
|
logger.info(f"Cleaned up {len(cleaned_up_files)} missing files")
|
725
|
-
|
958
|
+
|
726
959
|
# Get file descriptions for this project (after cleanup)
|
727
960
|
logger.info("Retrieving file descriptions...")
|
728
961
|
file_descriptions = await self.db_manager.get_all_file_descriptions(
|
729
962
|
project_id=project_id
|
730
963
|
)
|
731
964
|
logger.info(f"Found {len(file_descriptions)} file descriptions")
|
732
|
-
|
965
|
+
|
733
966
|
# Use provided token limit or fall back to server default
|
734
967
|
token_limit = arguments.get("tokenLimit", self.token_limit)
|
735
|
-
|
968
|
+
|
736
969
|
# Calculate total tokens for descriptions
|
737
970
|
logger.info("Calculating total token count...")
|
738
|
-
descriptions_tokens = self.token_counter.calculate_codebase_tokens(
|
739
|
-
|
971
|
+
descriptions_tokens = self.token_counter.calculate_codebase_tokens(
|
972
|
+
file_descriptions
|
973
|
+
)
|
974
|
+
|
740
975
|
# Get overview tokens if available
|
741
976
|
overview = await self.db_manager.get_project_overview(project_id)
|
742
977
|
overview_tokens = 0
|
743
978
|
if overview and overview.overview:
|
744
979
|
overview_tokens = self.token_counter.count_tokens(overview.overview)
|
745
|
-
|
980
|
+
|
746
981
|
total_tokens = descriptions_tokens + overview_tokens
|
747
982
|
is_large = total_tokens > token_limit
|
748
983
|
recommendation = "use_search" if is_large else "use_overview"
|
749
|
-
|
750
|
-
logger.info(
|
751
|
-
|
984
|
+
|
985
|
+
logger.info(
|
986
|
+
f"Codebase analysis complete: {total_tokens} tokens total "
|
987
|
+
f"({descriptions_tokens} descriptions + {overview_tokens} overview), "
|
988
|
+
f"{len(file_descriptions)} files"
|
989
|
+
)
|
990
|
+
logger.info(
|
991
|
+
f"Size assessment: {'LARGE' if is_large else 'SMALL'} "
|
992
|
+
f"(limit: {token_limit})"
|
993
|
+
)
|
752
994
|
logger.info(f"Recommendation: {recommendation}")
|
753
|
-
|
995
|
+
|
754
996
|
return {
|
755
997
|
"totalTokens": total_tokens,
|
756
998
|
"descriptionsTokens": descriptions_tokens,
|
@@ -760,19 +1002,24 @@ RESTful API with JWT auth. React frontend calls API. Background jobs via Bull qu
|
|
760
1002
|
"tokenLimit": token_limit,
|
761
1003
|
"totalFiles": len(file_descriptions),
|
762
1004
|
"cleanedUpFiles": cleaned_up_files,
|
763
|
-
"cleanedUpCount": len(cleaned_up_files)
|
1005
|
+
"cleanedUpCount": len(cleaned_up_files),
|
764
1006
|
}
|
765
|
-
|
766
|
-
async def _handle_find_missing_descriptions(
|
1007
|
+
|
1008
|
+
async def _handle_find_missing_descriptions(
|
1009
|
+
self, arguments: Dict[str, Any]
|
1010
|
+
) -> Dict[str, Any]:
|
767
1011
|
"""Handle find_missing_descriptions tool calls."""
|
768
|
-
logger.info(
|
1012
|
+
logger.info(
|
1013
|
+
f"Finding missing descriptions for: "
|
1014
|
+
f"{arguments.get('projectName', 'Unknown')}"
|
1015
|
+
)
|
769
1016
|
logger.info(f"Folder path: {arguments.get('folderPath', 'Unknown')}")
|
770
|
-
|
1017
|
+
|
771
1018
|
project_id = await self._get_or_create_project_id(arguments)
|
772
1019
|
folder_path = Path(arguments["folderPath"])
|
773
|
-
|
1020
|
+
|
774
1021
|
logger.info(f"Resolved project_id: {project_id}")
|
775
|
-
|
1022
|
+
|
776
1023
|
# Get existing file descriptions
|
777
1024
|
logger.info("Retrieving existing file descriptions...")
|
778
1025
|
existing_descriptions = await self.db_manager.get_all_file_descriptions(
|
@@ -780,7 +1027,7 @@ RESTful API with JWT auth. React frontend calls API. Background jobs via Bull qu
|
|
780
1027
|
)
|
781
1028
|
existing_paths = {desc.file_path for desc in existing_descriptions}
|
782
1029
|
logger.info(f"Found {len(existing_paths)} existing descriptions")
|
783
|
-
|
1030
|
+
|
784
1031
|
# Scan directory for files
|
785
1032
|
logger.info(f"Scanning project directory: {folder_path}")
|
786
1033
|
scanner = FileScanner(folder_path)
|
@@ -789,110 +1036,116 @@ RESTful API with JWT auth. React frontend calls API. Background jobs via Bull qu
|
|
789
1036
|
return {
|
790
1037
|
"error": f"Invalid or inaccessible project directory: {folder_path}"
|
791
1038
|
}
|
792
|
-
|
1039
|
+
|
793
1040
|
missing_files = scanner.find_missing_files(existing_paths)
|
794
1041
|
missing_paths = [scanner.get_relative_path(f) for f in missing_files]
|
795
|
-
|
1042
|
+
|
796
1043
|
logger.info(f"Found {len(missing_paths)} files without descriptions")
|
797
|
-
|
1044
|
+
|
798
1045
|
# Apply limit if specified
|
799
1046
|
limit = arguments.get("limit")
|
800
1047
|
total_missing = len(missing_paths)
|
801
1048
|
if limit is not None and isinstance(limit, int) and limit > 0:
|
802
1049
|
missing_paths = missing_paths[:limit]
|
803
1050
|
logger.info(f"Applied limit {limit}, returning {len(missing_paths)} files")
|
804
|
-
|
1051
|
+
|
805
1052
|
# Get project stats
|
806
1053
|
stats = scanner.get_project_stats()
|
807
1054
|
logger.info(f"Project stats: {stats.get('total_files', 0)} total files")
|
808
|
-
|
1055
|
+
|
809
1056
|
return {
|
810
1057
|
"missingFiles": missing_paths,
|
811
1058
|
"totalMissing": total_missing,
|
812
1059
|
"returnedCount": len(missing_paths),
|
813
1060
|
"existingDescriptions": len(existing_paths),
|
814
|
-
"projectStats": stats
|
1061
|
+
"projectStats": stats,
|
815
1062
|
}
|
816
|
-
|
817
|
-
async def _handle_search_descriptions(
|
1063
|
+
|
1064
|
+
async def _handle_search_descriptions(
|
1065
|
+
self, arguments: Dict[str, Any]
|
1066
|
+
) -> Dict[str, Any]:
|
818
1067
|
"""Handle search_descriptions tool calls."""
|
819
1068
|
project_id = await self._get_or_create_project_id(arguments)
|
820
1069
|
max_results = arguments.get("maxResults", 20)
|
821
|
-
|
1070
|
+
|
822
1071
|
# Perform search
|
823
1072
|
search_results = await self.db_manager.search_file_descriptions(
|
824
|
-
project_id=project_id,
|
825
|
-
query=arguments["query"],
|
826
|
-
max_results=max_results
|
1073
|
+
project_id=project_id, query=arguments["query"], max_results=max_results
|
827
1074
|
)
|
828
|
-
|
1075
|
+
|
829
1076
|
# Format results
|
830
1077
|
formatted_results = []
|
831
1078
|
for result in search_results:
|
832
|
-
formatted_results.append(
|
833
|
-
|
834
|
-
|
835
|
-
|
836
|
-
|
837
|
-
|
1079
|
+
formatted_results.append(
|
1080
|
+
{
|
1081
|
+
"filePath": result.file_path,
|
1082
|
+
"description": result.description,
|
1083
|
+
"relevanceScore": result.relevance_score,
|
1084
|
+
}
|
1085
|
+
)
|
1086
|
+
|
838
1087
|
return {
|
839
1088
|
"results": formatted_results,
|
840
1089
|
"totalResults": len(formatted_results),
|
841
1090
|
"query": arguments["query"],
|
842
|
-
"maxResults": max_results
|
1091
|
+
"maxResults": max_results,
|
843
1092
|
}
|
844
|
-
|
845
|
-
async def _handle_get_codebase_overview(
|
1093
|
+
|
1094
|
+
async def _handle_get_codebase_overview(
|
1095
|
+
self, arguments: Dict[str, Any]
|
1096
|
+
) -> Dict[str, Any]:
|
846
1097
|
"""Handle get_codebase_overview tool calls."""
|
847
1098
|
project_id = await self._get_or_create_project_id(arguments)
|
848
|
-
|
1099
|
+
|
849
1100
|
# Get all file descriptions
|
850
1101
|
file_descriptions = await self.db_manager.get_all_file_descriptions(
|
851
1102
|
project_id=project_id
|
852
1103
|
)
|
853
|
-
|
1104
|
+
|
854
1105
|
# Calculate total tokens
|
855
1106
|
total_tokens = self.token_counter.calculate_codebase_tokens(file_descriptions)
|
856
1107
|
is_large = self.token_counter.is_large_codebase(total_tokens)
|
857
|
-
|
858
|
-
# Always build and return the folder structure - if the AI called this
|
1108
|
+
|
1109
|
+
# Always build and return the folder structure - if the AI called this
|
1110
|
+
# tool, it wants the overview
|
859
1111
|
structure = self._build_folder_structure(file_descriptions)
|
860
|
-
|
1112
|
+
|
861
1113
|
return {
|
862
1114
|
"projectName": arguments["projectName"],
|
863
1115
|
"totalFiles": len(file_descriptions),
|
864
1116
|
"totalTokens": total_tokens,
|
865
1117
|
"isLarge": is_large,
|
866
1118
|
"tokenLimit": self.token_counter.token_limit,
|
867
|
-
"structure": structure
|
1119
|
+
"structure": structure,
|
868
1120
|
}
|
869
|
-
|
870
|
-
def _build_folder_structure(
|
1121
|
+
|
1122
|
+
def _build_folder_structure(
|
1123
|
+
self, file_descriptions: List[FileDescription]
|
1124
|
+
) -> Dict[str, Any]:
|
871
1125
|
"""Build hierarchical folder structure from file descriptions."""
|
872
1126
|
root = {"path": "", "files": [], "folders": {}}
|
873
|
-
|
1127
|
+
|
874
1128
|
for file_desc in file_descriptions:
|
875
1129
|
path_parts = Path(file_desc.file_path).parts
|
876
1130
|
current = root
|
877
|
-
|
1131
|
+
|
878
1132
|
# Navigate/create folder structure
|
879
1133
|
for i, part in enumerate(path_parts[:-1]):
|
880
|
-
folder_path = "/".join(path_parts[:i+1])
|
1134
|
+
folder_path = "/".join(path_parts[: i + 1])
|
881
1135
|
if part not in current["folders"]:
|
882
1136
|
current["folders"][part] = {
|
883
1137
|
"path": folder_path,
|
884
1138
|
"files": [],
|
885
|
-
"folders": {}
|
1139
|
+
"folders": {},
|
886
1140
|
}
|
887
1141
|
current = current["folders"][part]
|
888
|
-
|
1142
|
+
|
889
1143
|
# Add file to current folder
|
890
1144
|
if path_parts: # Handle empty paths
|
891
|
-
current["files"].append(
|
892
|
-
"path": file_desc.file_path,
|
893
|
-
|
894
|
-
|
895
|
-
|
1145
|
+
current["files"].append(
|
1146
|
+
{"path": file_desc.file_path, "description": file_desc.description}
|
1147
|
+
)
|
1148
|
+
|
896
1149
|
# Convert nested dict structure to list format, skipping empty folders
|
897
1150
|
def convert_structure(node):
|
898
1151
|
folders = []
|
@@ -901,108 +1154,112 @@ RESTful API with JWT auth. React frontend calls API. Background jobs via Bull qu
|
|
901
1154
|
# Only include folders that have files or non-empty subfolders
|
902
1155
|
if converted_folder["files"] or converted_folder["folders"]:
|
903
1156
|
folders.append(converted_folder)
|
904
|
-
|
905
|
-
return {
|
906
|
-
|
907
|
-
"files": node["files"],
|
908
|
-
"folders": folders
|
909
|
-
}
|
910
|
-
|
1157
|
+
|
1158
|
+
return {"path": node["path"], "files": node["files"], "folders": folders}
|
1159
|
+
|
911
1160
|
return convert_structure(root)
|
912
|
-
|
913
1161
|
|
914
|
-
|
915
|
-
|
1162
|
+
async def _handle_get_condensed_overview(
|
1163
|
+
self, arguments: Dict[str, Any]
|
1164
|
+
) -> Dict[str, Any]:
|
916
1165
|
"""Handle get_codebase_overview tool calls for condensed overviews."""
|
917
1166
|
project_id = await self._get_or_create_project_id(arguments)
|
918
|
-
|
1167
|
+
|
919
1168
|
# Try to get existing overview
|
920
1169
|
overview = await self.db_manager.get_project_overview(project_id)
|
921
|
-
|
1170
|
+
|
922
1171
|
if overview:
|
923
1172
|
return {
|
924
1173
|
"overview": overview.overview,
|
925
1174
|
"lastModified": overview.last_modified.isoformat(),
|
926
1175
|
"totalFiles": overview.total_files,
|
927
|
-
"totalTokensInFullDescriptions": overview.total_tokens
|
1176
|
+
"totalTokensInFullDescriptions": overview.total_tokens,
|
928
1177
|
}
|
929
1178
|
else:
|
930
1179
|
return {
|
931
1180
|
"overview": "",
|
932
1181
|
"lastModified": "",
|
933
1182
|
"totalFiles": 0,
|
934
|
-
"totalTokensInFullDescriptions": 0
|
1183
|
+
"totalTokensInFullDescriptions": 0,
|
935
1184
|
}
|
936
|
-
|
937
|
-
async def _handle_update_codebase_overview(
|
1185
|
+
|
1186
|
+
async def _handle_update_codebase_overview(
|
1187
|
+
self, arguments: Dict[str, Any]
|
1188
|
+
) -> Dict[str, Any]:
|
938
1189
|
"""Handle update_codebase_overview tool calls."""
|
939
1190
|
project_id = await self._get_or_create_project_id(arguments)
|
940
|
-
|
941
|
-
|
1191
|
+
|
942
1192
|
# Get current file count and total tokens for context
|
943
1193
|
file_descriptions = await self.db_manager.get_all_file_descriptions(
|
944
1194
|
project_id=project_id
|
945
1195
|
)
|
946
|
-
|
1196
|
+
|
947
1197
|
total_files = len(file_descriptions)
|
948
1198
|
total_tokens = self.token_counter.calculate_codebase_tokens(file_descriptions)
|
949
|
-
|
1199
|
+
|
950
1200
|
# Create overview record
|
951
1201
|
overview = ProjectOverview(
|
952
1202
|
project_id=project_id,
|
953
1203
|
overview=arguments["overview"],
|
954
1204
|
last_modified=datetime.utcnow(),
|
955
1205
|
total_files=total_files,
|
956
|
-
total_tokens=total_tokens
|
1206
|
+
total_tokens=total_tokens,
|
957
1207
|
)
|
958
|
-
|
1208
|
+
|
959
1209
|
await self.db_manager.create_project_overview(overview)
|
960
|
-
|
1210
|
+
|
961
1211
|
return {
|
962
1212
|
"success": True,
|
963
1213
|
"message": f"Overview updated for {total_files} files",
|
964
1214
|
"totalFiles": total_files,
|
965
1215
|
"totalTokens": total_tokens,
|
966
|
-
"overviewLength": len(arguments["overview"])
|
1216
|
+
"overviewLength": len(arguments["overview"]),
|
967
1217
|
}
|
968
|
-
|
969
|
-
async def _handle_get_word_frequency(
|
1218
|
+
|
1219
|
+
async def _handle_get_word_frequency(
|
1220
|
+
self, arguments: Dict[str, Any]
|
1221
|
+
) -> Dict[str, Any]:
|
970
1222
|
"""Handle get_word_frequency tool calls."""
|
971
1223
|
project_id = await self._get_or_create_project_id(arguments)
|
972
1224
|
limit = arguments.get("limit", 200)
|
973
|
-
|
1225
|
+
|
974
1226
|
# Analyze word frequency
|
975
1227
|
result = await self.db_manager.analyze_word_frequency(
|
976
|
-
project_id=project_id,
|
977
|
-
limit=limit
|
1228
|
+
project_id=project_id, limit=limit
|
978
1229
|
)
|
979
|
-
|
1230
|
+
|
980
1231
|
return {
|
981
|
-
"topTerms": [
|
1232
|
+
"topTerms": [
|
1233
|
+
{"term": term.term, "frequency": term.frequency}
|
1234
|
+
for term in result.top_terms
|
1235
|
+
],
|
982
1236
|
"totalTermsAnalyzed": result.total_terms_analyzed,
|
983
|
-
"totalUniqueTerms": result.total_unique_terms
|
1237
|
+
"totalUniqueTerms": result.total_unique_terms,
|
984
1238
|
}
|
985
|
-
|
986
|
-
async def _handle_search_codebase_overview(
|
1239
|
+
|
1240
|
+
async def _handle_search_codebase_overview(
|
1241
|
+
self, arguments: Dict[str, Any]
|
1242
|
+
) -> Dict[str, Any]:
|
987
1243
|
"""Handle search_codebase_overview tool calls."""
|
988
1244
|
project_id = await self._get_or_create_project_id(arguments)
|
989
1245
|
search_word = arguments["searchWord"].lower()
|
990
|
-
|
1246
|
+
|
991
1247
|
# Get the overview
|
992
1248
|
overview = await self.db_manager.get_project_overview(project_id)
|
993
|
-
|
1249
|
+
|
994
1250
|
if not overview or not overview.overview:
|
995
1251
|
return {
|
996
1252
|
"found": False,
|
997
1253
|
"message": "No overview found for this project",
|
998
|
-
"searchWord": arguments["searchWord"]
|
1254
|
+
"searchWord": arguments["searchWord"],
|
999
1255
|
}
|
1000
|
-
|
1256
|
+
|
1001
1257
|
# Split overview into sentences
|
1002
1258
|
import re
|
1003
|
-
|
1259
|
+
|
1260
|
+
sentences = re.split(r"[.!?]+", overview.overview)
|
1004
1261
|
sentences = [s.strip() for s in sentences if s.strip()]
|
1005
|
-
|
1262
|
+
|
1006
1263
|
# Find matches
|
1007
1264
|
matches = []
|
1008
1265
|
for i, sentence in enumerate(sentences):
|
@@ -1010,170 +1267,209 @@ RESTful API with JWT auth. React frontend calls API. Background jobs via Bull qu
|
|
1010
1267
|
# Get context: 2 sentences before and after
|
1011
1268
|
start_idx = max(0, i - 2)
|
1012
1269
|
end_idx = min(len(sentences), i + 3)
|
1013
|
-
|
1270
|
+
|
1014
1271
|
context_sentences = sentences[start_idx:end_idx]
|
1015
|
-
context =
|
1016
|
-
|
1017
|
-
matches.append(
|
1018
|
-
|
1019
|
-
|
1020
|
-
|
1021
|
-
|
1022
|
-
|
1023
|
-
|
1024
|
-
|
1272
|
+
context = ". ".join(context_sentences) + "."
|
1273
|
+
|
1274
|
+
matches.append(
|
1275
|
+
{
|
1276
|
+
"matchIndex": i,
|
1277
|
+
"matchSentence": sentence,
|
1278
|
+
"context": context,
|
1279
|
+
"contextStartIndex": start_idx,
|
1280
|
+
"contextEndIndex": end_idx - 1,
|
1281
|
+
}
|
1282
|
+
)
|
1283
|
+
|
1025
1284
|
return {
|
1026
1285
|
"found": len(matches) > 0,
|
1027
1286
|
"searchWord": arguments["searchWord"],
|
1028
1287
|
"matches": matches,
|
1029
1288
|
"totalMatches": len(matches),
|
1030
|
-
"totalSentences": len(sentences)
|
1289
|
+
"totalSentences": len(sentences),
|
1031
1290
|
}
|
1032
1291
|
|
1033
|
-
async def _handle_check_database_health(
|
1292
|
+
async def _handle_check_database_health(
|
1293
|
+
self, arguments: Dict[str, Any]
|
1294
|
+
) -> Dict[str, Any]:
|
1034
1295
|
"""
|
1035
1296
|
Handle check_database_health tool calls with comprehensive diagnostics.
|
1036
|
-
|
1297
|
+
|
1037
1298
|
Returns detailed database health information including retry statistics,
|
1038
1299
|
performance analysis, and resilience indicators.
|
1039
1300
|
"""
|
1040
1301
|
# Get comprehensive health diagnostics from the enhanced monitor
|
1041
|
-
if
|
1042
|
-
|
1302
|
+
if (
|
1303
|
+
hasattr(self.db_manager, "_health_monitor")
|
1304
|
+
and self.db_manager._health_monitor
|
1305
|
+
):
|
1306
|
+
comprehensive_diagnostics = (
|
1307
|
+
self.db_manager._health_monitor.get_comprehensive_diagnostics()
|
1308
|
+
)
|
1043
1309
|
else:
|
1044
1310
|
# Fallback to basic health check if monitor not available
|
1045
1311
|
health_check = await self.db_manager.check_health()
|
1046
1312
|
comprehensive_diagnostics = {
|
1047
1313
|
"basic_health_check": health_check,
|
1048
|
-
"note": "Enhanced health monitoring not available"
|
1314
|
+
"note": "Enhanced health monitoring not available",
|
1049
1315
|
}
|
1050
|
-
|
1316
|
+
|
1051
1317
|
# Get additional database-level statistics
|
1052
1318
|
database_stats = self.db_manager.get_database_stats()
|
1053
|
-
|
1319
|
+
|
1054
1320
|
return {
|
1055
1321
|
"comprehensive_diagnostics": comprehensive_diagnostics,
|
1056
1322
|
"database_statistics": database_stats,
|
1057
1323
|
"configuration": {
|
1058
1324
|
**self.db_config,
|
1059
1325
|
"retry_executor_config": (
|
1060
|
-
self.db_manager._retry_executor.config.__dict__
|
1061
|
-
if hasattr(self.db_manager,
|
1326
|
+
self.db_manager._retry_executor.config.__dict__
|
1327
|
+
if hasattr(self.db_manager, "_retry_executor")
|
1328
|
+
and self.db_manager._retry_executor
|
1062
1329
|
else {}
|
1063
|
-
)
|
1330
|
+
),
|
1064
1331
|
},
|
1065
1332
|
"server_info": {
|
1066
1333
|
"token_limit": self.token_limit,
|
1067
1334
|
"db_path": str(self.db_path),
|
1068
1335
|
"cache_dir": str(self.cache_dir),
|
1069
1336
|
"health_monitoring_enabled": (
|
1070
|
-
hasattr(self.db_manager,
|
1071
|
-
self.db_manager._health_monitor is not None
|
1072
|
-
)
|
1337
|
+
hasattr(self.db_manager, "_health_monitor")
|
1338
|
+
and self.db_manager._health_monitor is not None
|
1339
|
+
),
|
1073
1340
|
},
|
1074
1341
|
"timestamp": datetime.utcnow().isoformat(),
|
1075
|
-
"status_summary": self._generate_health_summary(comprehensive_diagnostics)
|
1342
|
+
"status_summary": self._generate_health_summary(comprehensive_diagnostics),
|
1076
1343
|
}
|
1077
|
-
|
1344
|
+
|
1078
1345
|
def _generate_health_summary(self, diagnostics: Dict[str, Any]) -> Dict[str, Any]:
|
1079
1346
|
"""Generate a concise health summary from comprehensive diagnostics."""
|
1080
1347
|
if "resilience_indicators" not in diagnostics:
|
1081
1348
|
return {"status": "limited_diagnostics_available"}
|
1082
|
-
|
1349
|
+
|
1083
1350
|
resilience = diagnostics["resilience_indicators"]
|
1084
1351
|
performance = diagnostics.get("performance_analysis", {})
|
1085
|
-
|
1352
|
+
|
1086
1353
|
# Overall status based on health score
|
1087
1354
|
health_score = resilience.get("overall_health_score", 0)
|
1088
1355
|
if health_score >= 90:
|
1089
1356
|
status = "excellent"
|
1090
1357
|
elif health_score >= 75:
|
1091
|
-
status = "good"
|
1358
|
+
status = "good"
|
1092
1359
|
elif health_score >= 50:
|
1093
1360
|
status = "fair"
|
1094
1361
|
else:
|
1095
1362
|
status = "poor"
|
1096
|
-
|
1363
|
+
|
1097
1364
|
return {
|
1098
1365
|
"overall_status": status,
|
1099
1366
|
"health_score": health_score,
|
1100
|
-
"retry_effectiveness": resilience.get("retry_effectiveness", {}).get(
|
1101
|
-
|
1102
|
-
|
1103
|
-
"
|
1367
|
+
"retry_effectiveness": resilience.get("retry_effectiveness", {}).get(
|
1368
|
+
"is_effective", False
|
1369
|
+
),
|
1370
|
+
"connection_stability": resilience.get("connection_stability", {}).get(
|
1371
|
+
"is_stable", False
|
1372
|
+
),
|
1373
|
+
"key_recommendations": resilience.get("recommendations", [])[
|
1374
|
+
:3
|
1375
|
+
], # Top 3 recommendations
|
1376
|
+
"performance_trend": performance.get("health_check_performance", {}).get(
|
1377
|
+
"recent_performance_trend", "unknown"
|
1378
|
+
),
|
1104
1379
|
}
|
1105
|
-
|
1106
|
-
async def _run_session_with_retry(
|
1380
|
+
|
1381
|
+
async def _run_session_with_retry(
|
1382
|
+
self, read_stream, write_stream, initialization_options
|
1383
|
+
) -> None:
|
1107
1384
|
"""Run a single MCP session with error handling and retry logic."""
|
1108
1385
|
max_retries = 3
|
1109
1386
|
base_delay = 1.0 # seconds
|
1110
|
-
|
1387
|
+
|
1111
1388
|
for attempt in range(max_retries + 1):
|
1112
1389
|
try:
|
1113
|
-
logger.info(
|
1114
|
-
|
1115
|
-
read_stream,
|
1116
|
-
write_stream,
|
1117
|
-
initialization_options
|
1390
|
+
logger.info(
|
1391
|
+
f"Starting MCP server protocol session (attempt {attempt + 1})..."
|
1118
1392
|
)
|
1393
|
+
await self.server.run(read_stream, write_stream, initialization_options)
|
1119
1394
|
logger.info("MCP server session completed normally")
|
1120
1395
|
return # Success, exit retry loop
|
1121
|
-
|
1396
|
+
|
1122
1397
|
except ValidationError as e:
|
1123
1398
|
# Handle malformed requests gracefully
|
1124
|
-
logger.warning(
|
1125
|
-
"
|
1126
|
-
|
1127
|
-
"
|
1128
|
-
|
1129
|
-
|
1130
|
-
|
1131
|
-
|
1132
|
-
|
1399
|
+
logger.warning(
|
1400
|
+
f"Received malformed request (attempt {attempt + 1}): {e}",
|
1401
|
+
extra={
|
1402
|
+
"structured_data": {
|
1403
|
+
"error_type": "ValidationError",
|
1404
|
+
"validation_errors": (
|
1405
|
+
e.errors() if hasattr(e, "errors") else str(e)
|
1406
|
+
),
|
1407
|
+
"attempt": attempt + 1,
|
1408
|
+
"max_retries": max_retries,
|
1409
|
+
}
|
1410
|
+
},
|
1411
|
+
)
|
1412
|
+
|
1133
1413
|
if attempt < max_retries:
|
1134
|
-
delay = base_delay * (2
|
1414
|
+
delay = base_delay * (2**attempt) # Exponential backoff
|
1135
1415
|
logger.info(f"Retrying in {delay} seconds...")
|
1136
1416
|
await asyncio.sleep(delay)
|
1137
1417
|
else:
|
1138
|
-
logger.error(
|
1418
|
+
logger.error(
|
1419
|
+
"Max retries exceeded for validation errors. Server will "
|
1420
|
+
"continue but this session failed."
|
1421
|
+
)
|
1139
1422
|
return
|
1140
|
-
|
1423
|
+
|
1141
1424
|
except (ConnectionError, BrokenPipeError, EOFError) as e:
|
1142
1425
|
# Handle client disconnection gracefully
|
1143
1426
|
logger.info(f"Client disconnected: {e}")
|
1144
1427
|
return
|
1145
|
-
|
1428
|
+
|
1146
1429
|
except Exception as e:
|
1147
1430
|
# Handle other exceptions with full logging
|
1148
1431
|
import traceback
|
1149
|
-
|
1432
|
+
|
1433
|
+
if "unhandled errors in a TaskGroup" in str(
|
1434
|
+
e
|
1435
|
+
) and "ValidationError" in str(e):
|
1150
1436
|
# This is likely a ValidationError wrapped in a TaskGroup exception
|
1151
|
-
logger.warning(
|
1152
|
-
"
|
1153
|
-
|
1154
|
-
|
1155
|
-
"
|
1156
|
-
|
1157
|
-
|
1158
|
-
|
1159
|
-
|
1160
|
-
|
1437
|
+
logger.warning(
|
1438
|
+
f"Detected wrapped validation error "
|
1439
|
+
f"(attempt {attempt + 1}): {e}",
|
1440
|
+
extra={
|
1441
|
+
"structured_data": {
|
1442
|
+
"error_type": type(e).__name__,
|
1443
|
+
"error_message": str(e),
|
1444
|
+
"attempt": attempt + 1,
|
1445
|
+
"max_retries": max_retries,
|
1446
|
+
"likely_validation_error": True,
|
1447
|
+
}
|
1448
|
+
},
|
1449
|
+
)
|
1450
|
+
|
1161
1451
|
if attempt < max_retries:
|
1162
|
-
delay = base_delay * (2
|
1452
|
+
delay = base_delay * (2**attempt)
|
1163
1453
|
logger.info(f"Retrying in {delay} seconds...")
|
1164
1454
|
await asyncio.sleep(delay)
|
1165
1455
|
else:
|
1166
|
-
logger.error(
|
1456
|
+
logger.error(
|
1457
|
+
"Max retries exceeded for validation errors. Server will "
|
1458
|
+
"continue but this session failed."
|
1459
|
+
)
|
1167
1460
|
return
|
1168
1461
|
else:
|
1169
1462
|
# This is a genuine error, log and re-raise
|
1170
|
-
logger.error(
|
1171
|
-
"
|
1172
|
-
|
1173
|
-
"
|
1174
|
-
|
1175
|
-
|
1176
|
-
|
1463
|
+
logger.error(
|
1464
|
+
f"MCP server session error: {e}",
|
1465
|
+
extra={
|
1466
|
+
"structured_data": {
|
1467
|
+
"error_type": type(e).__name__,
|
1468
|
+
"error_message": str(e),
|
1469
|
+
"traceback": traceback.format_exc(),
|
1470
|
+
}
|
1471
|
+
},
|
1472
|
+
)
|
1177
1473
|
raise
|
1178
1474
|
|
1179
1475
|
async def run(self) -> None:
|
@@ -1181,80 +1477,99 @@ RESTful API with JWT auth. React frontend calls API. Background jobs via Bull qu
|
|
1181
1477
|
logger.info("Starting server initialization...")
|
1182
1478
|
await self.initialize()
|
1183
1479
|
logger.info("Server initialization completed, starting MCP protocol...")
|
1184
|
-
|
1480
|
+
|
1185
1481
|
max_retries = 5
|
1186
1482
|
base_delay = 2.0 # seconds
|
1187
|
-
|
1483
|
+
|
1188
1484
|
for attempt in range(max_retries + 1):
|
1189
1485
|
try:
|
1190
1486
|
async with stdio_server() as (read_stream, write_stream):
|
1191
|
-
logger.info(
|
1487
|
+
logger.info(
|
1488
|
+
f"stdio_server context established (attempt {attempt + 1})"
|
1489
|
+
)
|
1192
1490
|
initialization_options = self.server.create_initialization_options()
|
1193
1491
|
logger.debug(f"Initialization options: {initialization_options}")
|
1194
|
-
|
1195
|
-
await self._run_session_with_retry(
|
1492
|
+
|
1493
|
+
await self._run_session_with_retry(
|
1494
|
+
read_stream, write_stream, initialization_options
|
1495
|
+
)
|
1196
1496
|
return # Success, exit retry loop
|
1197
|
-
|
1497
|
+
|
1198
1498
|
except KeyboardInterrupt:
|
1199
1499
|
logger.info("Server stopped by user interrupt")
|
1200
1500
|
return
|
1201
|
-
|
1501
|
+
|
1202
1502
|
except Exception as e:
|
1203
1503
|
import traceback
|
1204
|
-
|
1504
|
+
|
1205
1505
|
# Check if this is a wrapped validation error
|
1206
1506
|
error_str = str(e)
|
1207
1507
|
is_validation_error = (
|
1208
|
-
"ValidationError" in error_str
|
1209
|
-
"Field required" in error_str
|
1210
|
-
"Input should be" in error_str
|
1211
|
-
"pydantic_core._pydantic_core.ValidationError" in error_str
|
1508
|
+
"ValidationError" in error_str
|
1509
|
+
or "Field required" in error_str
|
1510
|
+
or "Input should be" in error_str
|
1511
|
+
or "pydantic_core._pydantic_core.ValidationError" in error_str
|
1212
1512
|
)
|
1213
|
-
|
1513
|
+
|
1214
1514
|
if is_validation_error:
|
1215
|
-
logger.warning(
|
1216
|
-
"
|
1217
|
-
|
1218
|
-
|
1219
|
-
"
|
1220
|
-
|
1221
|
-
|
1222
|
-
|
1223
|
-
|
1224
|
-
|
1515
|
+
logger.warning(
|
1516
|
+
f"Detected validation error in session "
|
1517
|
+
f"(attempt {attempt + 1}): Malformed client request",
|
1518
|
+
extra={
|
1519
|
+
"structured_data": {
|
1520
|
+
"error_type": "ValidationError",
|
1521
|
+
"error_message": (
|
1522
|
+
"Client sent malformed request "
|
1523
|
+
"(likely missing clientInfo)"
|
1524
|
+
),
|
1525
|
+
"attempt": attempt + 1,
|
1526
|
+
"max_retries": max_retries,
|
1527
|
+
"will_retry": attempt < max_retries,
|
1528
|
+
}
|
1529
|
+
},
|
1530
|
+
)
|
1531
|
+
|
1225
1532
|
if attempt < max_retries:
|
1226
|
-
delay = base_delay * (
|
1533
|
+
delay = base_delay * (
|
1534
|
+
2 ** min(attempt, 3)
|
1535
|
+
) # Cap exponential growth
|
1227
1536
|
logger.info(f"Retrying server in {delay} seconds...")
|
1228
1537
|
await asyncio.sleep(delay)
|
1229
1538
|
continue
|
1230
1539
|
else:
|
1231
|
-
logger.warning(
|
1540
|
+
logger.warning(
|
1541
|
+
"Max retries exceeded for validation errors. Server is "
|
1542
|
+
"robust against malformed requests."
|
1543
|
+
)
|
1232
1544
|
return
|
1233
1545
|
else:
|
1234
1546
|
# This is a genuine fatal error
|
1235
|
-
logger.error(
|
1236
|
-
"
|
1237
|
-
|
1238
|
-
"
|
1239
|
-
|
1240
|
-
|
1241
|
-
|
1547
|
+
logger.error(
|
1548
|
+
f"Fatal server error: {e}",
|
1549
|
+
extra={
|
1550
|
+
"structured_data": {
|
1551
|
+
"error_type": type(e).__name__,
|
1552
|
+
"error_message": str(e),
|
1553
|
+
"traceback": traceback.format_exc(),
|
1554
|
+
}
|
1555
|
+
},
|
1556
|
+
)
|
1242
1557
|
raise
|
1243
|
-
|
1558
|
+
|
1244
1559
|
# Clean shutdown
|
1245
1560
|
await self.shutdown()
|
1246
|
-
|
1561
|
+
|
1247
1562
|
async def shutdown(self) -> None:
|
1248
1563
|
"""Clean shutdown of server resources."""
|
1249
1564
|
try:
|
1250
1565
|
# Cancel any running tasks
|
1251
1566
|
self.task_manager.cancel_all()
|
1252
|
-
|
1567
|
+
|
1253
1568
|
# Close database connections
|
1254
1569
|
await self.db_manager.close_pool()
|
1255
|
-
|
1570
|
+
|
1256
1571
|
self.logger.info("Server shutdown completed successfully")
|
1257
|
-
|
1572
|
+
|
1258
1573
|
except Exception as e:
|
1259
1574
|
self.error_handler.log_error(e, context={"phase": "shutdown"})
|
1260
1575
|
|
@@ -1262,14 +1577,14 @@ RESTful API with JWT auth. React frontend calls API. Background jobs via Bull qu
|
|
1262
1577
|
async def main():
|
1263
1578
|
"""Main entry point for the MCP server."""
|
1264
1579
|
import sys
|
1265
|
-
|
1580
|
+
|
1266
1581
|
# Setup logging to stderr (stdout is used for MCP communication)
|
1267
1582
|
logging.basicConfig(
|
1268
1583
|
level=logging.INFO,
|
1269
1584
|
format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
|
1270
|
-
handlers=[logging.StreamHandler(sys.stderr)]
|
1585
|
+
handlers=[logging.StreamHandler(sys.stderr)],
|
1271
1586
|
)
|
1272
|
-
|
1587
|
+
|
1273
1588
|
# Create and run server
|
1274
1589
|
server = MCPCodeIndexServer()
|
1275
1590
|
await server.run()
|