mcp-code-indexer 3.1.3__py3-none-any.whl → 3.1.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mcp_code_indexer/__init__.py +8 -6
- mcp_code_indexer/ask_handler.py +105 -75
- mcp_code_indexer/claude_api_handler.py +125 -82
- mcp_code_indexer/cleanup_manager.py +107 -81
- mcp_code_indexer/database/connection_health.py +212 -161
- mcp_code_indexer/database/database.py +529 -415
- mcp_code_indexer/database/exceptions.py +167 -118
- mcp_code_indexer/database/models.py +54 -19
- mcp_code_indexer/database/retry_executor.py +139 -103
- mcp_code_indexer/deepask_handler.py +178 -140
- mcp_code_indexer/error_handler.py +88 -76
- mcp_code_indexer/file_scanner.py +163 -141
- mcp_code_indexer/git_hook_handler.py +352 -261
- mcp_code_indexer/logging_config.py +76 -94
- mcp_code_indexer/main.py +406 -320
- mcp_code_indexer/middleware/error_middleware.py +106 -71
- mcp_code_indexer/query_preprocessor.py +40 -40
- mcp_code_indexer/server/mcp_server.py +785 -469
- mcp_code_indexer/token_counter.py +54 -47
- {mcp_code_indexer-3.1.3.dist-info → mcp_code_indexer-3.1.5.dist-info}/METADATA +3 -3
- mcp_code_indexer-3.1.5.dist-info/RECORD +37 -0
- mcp_code_indexer-3.1.3.dist-info/RECORD +0 -37
- {mcp_code_indexer-3.1.3.dist-info → mcp_code_indexer-3.1.5.dist-info}/WHEEL +0 -0
- {mcp_code_indexer-3.1.3.dist-info → mcp_code_indexer-3.1.5.dist-info}/entry_points.txt +0 -0
- {mcp_code_indexer-3.1.3.dist-info → mcp_code_indexer-3.1.5.dist-info}/licenses/LICENSE +0 -0
- {mcp_code_indexer-3.1.3.dist-info → mcp_code_indexer-3.1.5.dist-info}/top_level.txt +0 -0
@@ -6,7 +6,6 @@ for file description management tools.
|
|
6
6
|
"""
|
7
7
|
|
8
8
|
import asyncio
|
9
|
-
import hashlib
|
10
9
|
import html
|
11
10
|
import json
|
12
11
|
import logging
|
@@ -14,7 +13,7 @@ import re
|
|
14
13
|
import uuid
|
15
14
|
from datetime import datetime
|
16
15
|
from pathlib import Path
|
17
|
-
from typing import Any, Dict, List, Optional
|
16
|
+
from typing import Any, Dict, List, Optional
|
18
17
|
|
19
18
|
from mcp import types
|
20
19
|
from mcp.server import Server
|
@@ -25,12 +24,15 @@ from mcp_code_indexer.database.database import DatabaseManager
|
|
25
24
|
from mcp_code_indexer.file_scanner import FileScanner
|
26
25
|
from mcp_code_indexer.token_counter import TokenCounter
|
27
26
|
from mcp_code_indexer.database.models import (
|
28
|
-
Project,
|
29
|
-
|
30
|
-
|
27
|
+
Project,
|
28
|
+
FileDescription,
|
29
|
+
ProjectOverview,
|
30
|
+
)
|
31
|
+
from mcp_code_indexer.error_handler import setup_error_handling
|
32
|
+
from mcp_code_indexer.middleware.error_middleware import (
|
33
|
+
create_tool_middleware,
|
34
|
+
AsyncTaskManager,
|
31
35
|
)
|
32
|
-
from mcp_code_indexer.error_handler import setup_error_handling, ErrorHandler
|
33
|
-
from mcp_code_indexer.middleware.error_middleware import create_tool_middleware, AsyncTaskManager
|
34
36
|
from mcp_code_indexer.logging_config import get_logger
|
35
37
|
|
36
38
|
|
@@ -40,11 +42,11 @@ logger = logging.getLogger(__name__)
|
|
40
42
|
class MCPCodeIndexServer:
|
41
43
|
"""
|
42
44
|
MCP Code Index Server.
|
43
|
-
|
45
|
+
|
44
46
|
Provides file description tracking and codebase navigation tools
|
45
47
|
through the Model Context Protocol.
|
46
48
|
"""
|
47
|
-
|
49
|
+
|
48
50
|
def __init__(
|
49
51
|
self,
|
50
52
|
token_limit: int = 32000,
|
@@ -57,11 +59,11 @@ class MCPCodeIndexServer:
|
|
57
59
|
health_check_interval: float = 30.0,
|
58
60
|
retry_min_wait: float = 0.1,
|
59
61
|
retry_max_wait: float = 2.0,
|
60
|
-
retry_jitter: float = 0.2
|
62
|
+
retry_jitter: float = 0.2,
|
61
63
|
):
|
62
64
|
"""
|
63
65
|
Initialize the MCP Code Index Server.
|
64
|
-
|
66
|
+
|
65
67
|
Args:
|
66
68
|
token_limit: Maximum tokens before recommending search over overview
|
67
69
|
db_path: Path to SQLite database
|
@@ -78,7 +80,7 @@ class MCPCodeIndexServer:
|
|
78
80
|
self.token_limit = token_limit
|
79
81
|
self.db_path = db_path or Path.home() / ".mcp-code-index" / "tracker.db"
|
80
82
|
self.cache_dir = cache_dir or Path.home() / ".mcp-code-index" / "cache"
|
81
|
-
|
83
|
+
|
82
84
|
# Store database configuration
|
83
85
|
self.db_config = {
|
84
86
|
"pool_size": db_pool_size,
|
@@ -88,12 +90,12 @@ class MCPCodeIndexServer:
|
|
88
90
|
"health_check_interval": health_check_interval,
|
89
91
|
"retry_min_wait": retry_min_wait,
|
90
92
|
"retry_max_wait": retry_max_wait,
|
91
|
-
"retry_jitter": retry_jitter
|
93
|
+
"retry_jitter": retry_jitter,
|
92
94
|
}
|
93
|
-
|
95
|
+
|
94
96
|
# Initialize components
|
95
97
|
self.db_manager = DatabaseManager(
|
96
|
-
db_path=self.db_path,
|
98
|
+
db_path=self.db_path,
|
97
99
|
pool_size=db_pool_size,
|
98
100
|
retry_count=db_retry_count,
|
99
101
|
timeout=db_timeout,
|
@@ -101,58 +103,58 @@ class MCPCodeIndexServer:
|
|
101
103
|
health_check_interval=health_check_interval,
|
102
104
|
retry_min_wait=retry_min_wait,
|
103
105
|
retry_max_wait=retry_max_wait,
|
104
|
-
retry_jitter=retry_jitter
|
106
|
+
retry_jitter=retry_jitter,
|
105
107
|
)
|
106
108
|
self.token_counter = TokenCounter(token_limit)
|
107
|
-
|
109
|
+
|
108
110
|
# Setup error handling
|
109
111
|
self.logger = get_logger(__name__)
|
110
112
|
self.error_handler = setup_error_handling(self.logger)
|
111
113
|
self.middleware = create_tool_middleware(self.error_handler)
|
112
114
|
self.task_manager = AsyncTaskManager(self.error_handler)
|
113
|
-
|
115
|
+
|
114
116
|
# Create MCP server
|
115
117
|
self.server = Server("mcp-code-indexer")
|
116
|
-
|
118
|
+
|
117
119
|
# Register handlers
|
118
120
|
self._register_handlers()
|
119
|
-
|
121
|
+
|
120
122
|
# Add debug logging for server events
|
121
123
|
self.logger.debug("MCP server instance created and handlers registered")
|
122
|
-
|
124
|
+
|
123
125
|
self.logger.info(
|
124
|
-
"MCP Code Index Server initialized",
|
125
|
-
extra={"structured_data": {"initialization": {"token_limit": token_limit}}}
|
126
|
+
"MCP Code Index Server initialized",
|
127
|
+
extra={"structured_data": {"initialization": {"token_limit": token_limit}}},
|
126
128
|
)
|
127
|
-
|
129
|
+
|
128
130
|
def _clean_html_entities(self, text: str) -> str:
|
129
131
|
"""
|
130
132
|
Clean HTML entities from text to prevent encoding issues.
|
131
|
-
|
133
|
+
|
132
134
|
Args:
|
133
135
|
text: Text that may contain HTML entities
|
134
|
-
|
136
|
+
|
135
137
|
Returns:
|
136
138
|
Text with HTML entities decoded to proper characters
|
137
139
|
"""
|
138
140
|
if not text:
|
139
141
|
return text
|
140
|
-
|
142
|
+
|
141
143
|
# Decode HTML entities like < > & etc.
|
142
144
|
return html.unescape(text)
|
143
|
-
|
145
|
+
|
144
146
|
def _clean_arguments(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
|
145
147
|
"""
|
146
148
|
Clean HTML entities from all text arguments.
|
147
|
-
|
149
|
+
|
148
150
|
Args:
|
149
151
|
arguments: Dictionary of arguments to clean
|
150
|
-
|
152
|
+
|
151
153
|
Returns:
|
152
154
|
Dictionary with HTML entities decoded in all string values
|
153
155
|
"""
|
154
156
|
cleaned = {}
|
155
|
-
|
157
|
+
|
156
158
|
for key, value in arguments.items():
|
157
159
|
if isinstance(value, str):
|
158
160
|
cleaned[key] = self._clean_html_entities(value)
|
@@ -168,19 +170,19 @@ class MCPCodeIndexServer:
|
|
168
170
|
else:
|
169
171
|
# Pass through other types unchanged
|
170
172
|
cleaned[key] = value
|
171
|
-
|
173
|
+
|
172
174
|
return cleaned
|
173
|
-
|
175
|
+
|
174
176
|
def _parse_json_robust(self, json_str: str) -> Dict[str, Any]:
|
175
177
|
"""
|
176
178
|
Parse JSON with automatic repair for common issues.
|
177
|
-
|
179
|
+
|
178
180
|
Args:
|
179
181
|
json_str: JSON string that may have formatting issues
|
180
|
-
|
182
|
+
|
181
183
|
Returns:
|
182
184
|
Parsed JSON dictionary
|
183
|
-
|
185
|
+
|
184
186
|
Raises:
|
185
187
|
ValueError: If JSON cannot be parsed even after repair attempts
|
186
188
|
"""
|
@@ -189,263 +191,474 @@ class MCPCodeIndexServer:
|
|
189
191
|
return json.loads(json_str)
|
190
192
|
except json.JSONDecodeError as original_error:
|
191
193
|
logger.warning(f"Initial JSON parse failed: {original_error}")
|
192
|
-
|
194
|
+
|
193
195
|
# Try to repair common issues
|
194
196
|
repaired = json_str
|
195
|
-
|
197
|
+
|
196
198
|
# Fix 1: Quote unquoted URLs and paths
|
197
199
|
# Look for patterns like: "key": http://... or "key": /path/...
|
198
200
|
url_pattern = r'("[\w]+"):\s*([a-zA-Z][a-zA-Z0-9+.-]*://[^\s,}]+|/[^\s,}]*)'
|
199
201
|
repaired = re.sub(url_pattern, r'\1: "\2"', repaired)
|
200
|
-
|
202
|
+
|
201
203
|
# Fix 2: Quote unquoted boolean-like strings
|
202
|
-
# Look for: "key": true-ish-string or "key": false-ish-string
|
203
|
-
bool_pattern =
|
204
|
+
# Look for: "key": true-ish-string or "key": false-ish-string
|
205
|
+
bool_pattern = (
|
206
|
+
r'("[\w]+"):\s*([a-zA-Z][a-zA-Z0-9_-]*[a-zA-Z0-9])(?=\s*[,}])'
|
207
|
+
)
|
204
208
|
repaired = re.sub(bool_pattern, r'\1: "\2"', repaired)
|
205
|
-
|
209
|
+
|
206
210
|
# Fix 3: Remove trailing commas
|
207
|
-
repaired = re.sub(r
|
208
|
-
|
211
|
+
repaired = re.sub(r",(\s*[}\]])", r"\1", repaired)
|
212
|
+
|
209
213
|
# Fix 4: Ensure proper string quoting for common unquoted values
|
210
214
|
# Handle cases like: "key": value (where value should be "value")
|
211
215
|
unquoted_pattern = r'("[\w]+"):\s*([a-zA-Z0-9_-]+)(?=\s*[,}])'
|
212
216
|
repaired = re.sub(unquoted_pattern, r'\1: "\2"', repaired)
|
213
|
-
|
217
|
+
|
214
218
|
try:
|
215
219
|
result = json.loads(repaired)
|
216
|
-
logger.info(
|
220
|
+
logger.info(
|
221
|
+
f"Successfully repaired JSON. Original: {json_str[:100]}..."
|
222
|
+
)
|
217
223
|
logger.info(f"Repaired: {repaired[:100]}...")
|
218
224
|
return result
|
219
225
|
except json.JSONDecodeError as repair_error:
|
220
226
|
logger.error(f"JSON repair failed. Original: {json_str}")
|
221
227
|
logger.error(f"Repaired attempt: {repaired}")
|
222
|
-
raise ValueError(
|
223
|
-
|
228
|
+
raise ValueError(
|
229
|
+
f"Could not parse JSON even after repair attempts. "
|
230
|
+
f"Original error: {original_error}, Repair error: {repair_error}"
|
231
|
+
)
|
232
|
+
|
224
233
|
async def initialize(self) -> None:
|
225
234
|
"""Initialize database and other resources."""
|
226
235
|
await self.db_manager.initialize()
|
227
236
|
logger.info("Server initialized successfully")
|
228
|
-
|
237
|
+
|
229
238
|
def _register_handlers(self) -> None:
|
230
239
|
"""Register MCP tool and resource handlers."""
|
231
|
-
|
240
|
+
|
232
241
|
@self.server.list_tools()
|
233
242
|
async def list_tools() -> List[types.Tool]:
|
234
243
|
"""Return list of available tools."""
|
235
244
|
return [
|
236
245
|
types.Tool(
|
237
246
|
name="get_file_description",
|
238
|
-
description=
|
247
|
+
description=(
|
248
|
+
"Retrieves the stored description for a specific file in a "
|
249
|
+
"codebase. Use this to quickly understand what a file "
|
250
|
+
"contains without reading its full contents."
|
251
|
+
),
|
239
252
|
inputSchema={
|
240
253
|
"type": "object",
|
241
254
|
"properties": {
|
242
255
|
"projectName": {
|
243
256
|
"type": "string",
|
244
|
-
"description": "The name of the project"
|
257
|
+
"description": "The name of the project",
|
245
258
|
},
|
246
259
|
"folderPath": {
|
247
|
-
"type": "string",
|
248
|
-
"description":
|
260
|
+
"type": "string",
|
261
|
+
"description": (
|
262
|
+
"Absolute path to the project folder on disk"
|
263
|
+
),
|
249
264
|
},
|
250
|
-
|
251
|
-
|
252
265
|
"filePath": {
|
253
266
|
"type": "string",
|
254
|
-
"description":
|
255
|
-
|
267
|
+
"description": (
|
268
|
+
"Relative path to the file from project root"
|
269
|
+
),
|
270
|
+
},
|
256
271
|
},
|
257
272
|
"required": ["projectName", "folderPath", "filePath"],
|
258
|
-
"additionalProperties": False
|
259
|
-
}
|
273
|
+
"additionalProperties": False,
|
274
|
+
},
|
260
275
|
),
|
261
276
|
types.Tool(
|
262
277
|
name="update_file_description",
|
263
|
-
description=
|
278
|
+
description=(
|
279
|
+
"Creates or updates the description for a file. Use this "
|
280
|
+
"after analyzing a file's contents to store a detailed summary."
|
281
|
+
),
|
264
282
|
inputSchema={
|
265
283
|
"type": "object",
|
266
284
|
"properties": {
|
267
|
-
"projectName": {
|
268
|
-
|
269
|
-
|
270
|
-
|
271
|
-
"
|
272
|
-
|
285
|
+
"projectName": {
|
286
|
+
"type": "string",
|
287
|
+
"description": "The name of the project",
|
288
|
+
},
|
289
|
+
"folderPath": {
|
290
|
+
"type": "string",
|
291
|
+
"description": (
|
292
|
+
"Absolute path to the project folder on disk"
|
293
|
+
),
|
294
|
+
},
|
295
|
+
"filePath": {
|
296
|
+
"type": "string",
|
297
|
+
"description": (
|
298
|
+
"Relative path to the file from project root"
|
299
|
+
),
|
300
|
+
},
|
301
|
+
"description": {
|
302
|
+
"type": "string",
|
303
|
+
"description": (
|
304
|
+
"Detailed description of the file's contents"
|
305
|
+
),
|
306
|
+
},
|
307
|
+
"fileHash": {
|
308
|
+
"type": "string",
|
309
|
+
"description": (
|
310
|
+
"SHA-256 hash of the file contents (optional)"
|
311
|
+
),
|
312
|
+
},
|
273
313
|
},
|
274
|
-
"required": [
|
275
|
-
|
276
|
-
|
314
|
+
"required": [
|
315
|
+
"projectName",
|
316
|
+
"folderPath",
|
317
|
+
"filePath",
|
318
|
+
"description",
|
319
|
+
],
|
320
|
+
"additionalProperties": False,
|
321
|
+
},
|
277
322
|
),
|
278
323
|
types.Tool(
|
279
324
|
name="check_codebase_size",
|
280
|
-
description=
|
325
|
+
description=(
|
326
|
+
"Checks the total token count of a codebase's file structure "
|
327
|
+
"and descriptions. Returns whether the codebase is 'large' "
|
328
|
+
"and recommends using search instead of the full overview."
|
329
|
+
),
|
281
330
|
inputSchema={
|
282
331
|
"type": "object",
|
283
332
|
"properties": {
|
284
|
-
"projectName": {
|
285
|
-
|
286
|
-
|
287
|
-
|
333
|
+
"projectName": {
|
334
|
+
"type": "string",
|
335
|
+
"description": "The name of the project",
|
336
|
+
},
|
337
|
+
"folderPath": {
|
338
|
+
"type": "string",
|
339
|
+
"description": (
|
340
|
+
"Absolute path to the project folder on disk"
|
341
|
+
),
|
342
|
+
},
|
343
|
+
"tokenLimit": {
|
344
|
+
"type": "integer",
|
345
|
+
"description": (
|
346
|
+
"Optional token limit override "
|
347
|
+
"(defaults to server configuration)"
|
348
|
+
),
|
349
|
+
},
|
288
350
|
},
|
289
351
|
"required": ["projectName", "folderPath"],
|
290
|
-
"additionalProperties": False
|
291
|
-
}
|
352
|
+
"additionalProperties": False,
|
353
|
+
},
|
292
354
|
),
|
293
355
|
types.Tool(
|
294
356
|
name="find_missing_descriptions",
|
295
|
-
description=
|
357
|
+
description=(
|
358
|
+
"Scans the project folder to find files that don't have "
|
359
|
+
"descriptions yet. Use update_file_description to add "
|
360
|
+
"descriptions for individual files."
|
361
|
+
),
|
296
362
|
inputSchema={
|
297
363
|
"type": "object",
|
298
364
|
"properties": {
|
299
|
-
"projectName": {
|
300
|
-
|
301
|
-
|
302
|
-
|
365
|
+
"projectName": {
|
366
|
+
"type": "string",
|
367
|
+
"description": "The name of the project",
|
368
|
+
},
|
369
|
+
"folderPath": {
|
370
|
+
"type": "string",
|
371
|
+
"description": (
|
372
|
+
"Absolute path to the project folder on disk"
|
373
|
+
),
|
374
|
+
},
|
375
|
+
"limit": {
|
376
|
+
"type": "integer",
|
377
|
+
"description": (
|
378
|
+
"Maximum number of missing files to return "
|
379
|
+
"(optional)"
|
380
|
+
),
|
381
|
+
},
|
303
382
|
},
|
304
383
|
"required": ["projectName", "folderPath"],
|
305
|
-
"additionalProperties": False
|
306
|
-
}
|
384
|
+
"additionalProperties": False,
|
385
|
+
},
|
307
386
|
),
|
308
387
|
types.Tool(
|
309
388
|
name="search_descriptions",
|
310
|
-
description=
|
389
|
+
description=(
|
390
|
+
"Searches through all file descriptions in a project to find "
|
391
|
+
"files related to specific functionality. Use this for large "
|
392
|
+
"codebases instead of loading the entire structure. Always "
|
393
|
+
"start with the fewest terms possible (1 to 3 words AT MOST); "
|
394
|
+
"if the tool returns a lot of results (more than 20) or the "
|
395
|
+
"results are not relevant, then narrow it down by increasing "
|
396
|
+
"the number of search words one at a time and calling the tool "
|
397
|
+
"again. Start VERY broad, then narrow the focus only if needed!"
|
398
|
+
),
|
311
399
|
inputSchema={
|
312
400
|
"type": "object",
|
313
401
|
"properties": {
|
314
|
-
"projectName": {
|
315
|
-
|
316
|
-
|
317
|
-
|
318
|
-
"
|
402
|
+
"projectName": {
|
403
|
+
"type": "string",
|
404
|
+
"description": "The name of the project",
|
405
|
+
},
|
406
|
+
"folderPath": {
|
407
|
+
"type": "string",
|
408
|
+
"description": (
|
409
|
+
"Absolute path to the project folder on disk"
|
410
|
+
),
|
411
|
+
},
|
412
|
+
"query": {
|
413
|
+
"type": "string",
|
414
|
+
"description": (
|
415
|
+
"Search query (e.g., 'authentication middleware', "
|
416
|
+
"'database models')"
|
417
|
+
),
|
418
|
+
},
|
419
|
+
"maxResults": {
|
420
|
+
"type": "integer",
|
421
|
+
"default": 20,
|
422
|
+
"description": "Maximum number of results to return",
|
423
|
+
},
|
319
424
|
},
|
320
425
|
"required": ["projectName", "folderPath", "query"],
|
321
|
-
"additionalProperties": False
|
322
|
-
}
|
426
|
+
"additionalProperties": False,
|
427
|
+
},
|
323
428
|
),
|
324
429
|
types.Tool(
|
325
430
|
name="get_all_descriptions",
|
326
|
-
description=
|
431
|
+
description=(
|
432
|
+
"Returns the complete file-by-file structure of a codebase "
|
433
|
+
"with individual descriptions for each file. For large "
|
434
|
+
"codebases, consider using get_codebase_overview for a "
|
435
|
+
"condensed summary instead."
|
436
|
+
),
|
327
437
|
inputSchema={
|
328
438
|
"type": "object",
|
329
439
|
"properties": {
|
330
|
-
"projectName": {
|
331
|
-
|
440
|
+
"projectName": {
|
441
|
+
"type": "string",
|
442
|
+
"description": "The name of the project",
|
443
|
+
},
|
444
|
+
"folderPath": {
|
445
|
+
"type": "string",
|
446
|
+
"description": (
|
447
|
+
"Absolute path to the project folder on disk"
|
448
|
+
),
|
449
|
+
},
|
332
450
|
},
|
333
451
|
"required": ["projectName", "folderPath"],
|
334
|
-
"additionalProperties": False
|
335
|
-
}
|
452
|
+
"additionalProperties": False,
|
453
|
+
},
|
336
454
|
),
|
337
455
|
types.Tool(
|
338
456
|
name="get_codebase_overview",
|
339
|
-
description=
|
457
|
+
description=(
|
458
|
+
"Returns a condensed, interpretive overview of the entire "
|
459
|
+
"codebase. This is a single comprehensive narrative that "
|
460
|
+
"captures the architecture, key components, relationships, "
|
461
|
+
"and design patterns. Unlike get_all_descriptions which "
|
462
|
+
"lists every file, this provides a holistic view suitable "
|
463
|
+
"for understanding the codebase's structure and purpose. "
|
464
|
+
"If no overview exists, returns empty string."
|
465
|
+
),
|
340
466
|
inputSchema={
|
341
467
|
"type": "object",
|
342
468
|
"properties": {
|
343
|
-
"projectName": {
|
344
|
-
|
469
|
+
"projectName": {
|
470
|
+
"type": "string",
|
471
|
+
"description": "The name of the project",
|
472
|
+
},
|
473
|
+
"folderPath": {
|
474
|
+
"type": "string",
|
475
|
+
"description": (
|
476
|
+
"Absolute path to the project folder on disk"
|
477
|
+
),
|
478
|
+
},
|
345
479
|
},
|
346
480
|
"required": ["projectName", "folderPath"],
|
347
|
-
"additionalProperties": False
|
348
|
-
}
|
481
|
+
"additionalProperties": False,
|
482
|
+
},
|
349
483
|
),
|
350
484
|
types.Tool(
|
351
485
|
name="update_codebase_overview",
|
352
|
-
description=
|
353
|
-
|
354
|
-
|
355
|
-
|
356
|
-
|
357
|
-
|
358
|
-
|
359
|
-
|
360
|
-
|
361
|
-
|
362
|
-
|
363
|
-
|
364
|
-
|
365
|
-
|
366
|
-
|
367
|
-
|
368
|
-
|
369
|
-
|
370
|
-
|
371
|
-
|
372
|
-
|
373
|
-
|
374
|
-
|
375
|
-
|
376
|
-
|
377
|
-
|
378
|
-
|
379
|
-
|
380
|
-
|
381
|
-
|
382
|
-
|
383
|
-
|
384
|
-
|
385
|
-
|
386
|
-
|
486
|
+
description=(
|
487
|
+
"Creates a concise codebase overview for AI agents. Focus on "
|
488
|
+
"essential navigation and context in 3500-7000 words. Include: "
|
489
|
+
"(1) One-paragraph system summary - what it does and its core "
|
490
|
+
"purpose, (2) Directory tree with one-line descriptions for "
|
491
|
+
"each major folder, (3) Key architectural patterns (e.g., MVC, "
|
492
|
+
"microservices, event-driven) in 2-3 sentences, (4) Critical "
|
493
|
+
"file locations (entry points, config, main business logic), "
|
494
|
+
"(5) Essential conventions (naming, file organization, error "
|
495
|
+
"handling), (6) Important gotchas or non-obvious connections. "
|
496
|
+
"Keep it scannable and action-oriented.\n\n"
|
497
|
+
"Example:\n\n"
|
498
|
+
"````\n"
|
499
|
+
"## System Summary\n"
|
500
|
+
"E-commerce platform handling product catalog, orders, "
|
501
|
+
"and payments with React frontend and Node.js API.\n\n"
|
502
|
+
"## Directory Structure\n"
|
503
|
+
"```\n"
|
504
|
+
"src/\n"
|
505
|
+
"├── api/ # REST endpoints "
|
506
|
+
"(auth in auth.js, orders in orders/)\n"
|
507
|
+
"├── models/ # Sequelize models "
|
508
|
+
"(User, Product, Order)\n"
|
509
|
+
"├── services/ # Stripe (payments/), "
|
510
|
+
"SendGrid (email/)\n"
|
511
|
+
"├── client/ # React app "
|
512
|
+
"(components/, pages/, hooks/)\n"
|
513
|
+
"└── shared/ # Types and constants used "
|
514
|
+
"by both API and client\n"
|
515
|
+
"```\n\n"
|
516
|
+
"## Architecture\n"
|
517
|
+
"RESTful API with JWT auth. React frontend calls API. "
|
518
|
+
"Background jobs via Bull queue. PostgreSQL with "
|
519
|
+
"Sequelize ORM.\n\n"
|
520
|
+
"## Key Files\n"
|
521
|
+
"- Entry: `src/index.js` "
|
522
|
+
"(starts Express server)\n"
|
523
|
+
"- Config: `src/config/` "
|
524
|
+
"(env-specific settings)\n"
|
525
|
+
"- Routes: `src/api/routes.js` "
|
526
|
+
"(all endpoints defined here)\n"
|
527
|
+
"- Auth: `src/middleware/auth.js` "
|
528
|
+
"(JWT validation)\n\n"
|
529
|
+
"## Conventions\n"
|
530
|
+
"- Files named `[entity].service.js` "
|
531
|
+
"handle business logic\n"
|
532
|
+
"- All API routes return "
|
533
|
+
"`{ success: boolean, data?: any, error?: string }`\n"
|
534
|
+
"- Database migrations in `migrations/` - "
|
535
|
+
"run before adding models\n\n"
|
536
|
+
"## Important Notes\n"
|
537
|
+
"- Payment webhooks MUST be idempotent "
|
538
|
+
"(check `processedWebhooks` table)\n"
|
539
|
+
"- User emails are case-insensitive "
|
540
|
+
"(lowercase in DB)\n"
|
541
|
+
"- Order status transitions enforced in "
|
542
|
+
"`Order.beforeUpdate` hook\n"
|
543
|
+
"````"
|
544
|
+
),
|
387
545
|
inputSchema={
|
388
546
|
"type": "object",
|
389
547
|
"properties": {
|
390
|
-
"projectName": {
|
391
|
-
|
392
|
-
|
393
|
-
|
548
|
+
"projectName": {
|
549
|
+
"type": "string",
|
550
|
+
"description": "The name of the project",
|
551
|
+
},
|
552
|
+
"folderPath": {
|
553
|
+
"type": "string",
|
554
|
+
"description": (
|
555
|
+
"Absolute path to the project folder on disk"
|
556
|
+
),
|
557
|
+
},
|
558
|
+
"overview": {
|
559
|
+
"type": "string",
|
560
|
+
"description": (
|
561
|
+
"Concise codebase overview "
|
562
|
+
"(aim for 3500-7500 words / 5k-10k tokens)"
|
563
|
+
),
|
564
|
+
},
|
394
565
|
},
|
395
566
|
"required": ["projectName", "folderPath", "overview"],
|
396
|
-
"additionalProperties": False
|
397
|
-
}
|
567
|
+
"additionalProperties": False,
|
568
|
+
},
|
398
569
|
),
|
399
570
|
types.Tool(
|
400
571
|
name="get_word_frequency",
|
401
|
-
description=
|
572
|
+
description=(
|
573
|
+
"Analyzes all file descriptions to find the most frequently "
|
574
|
+
"used technical terms. Filters out common English stop words "
|
575
|
+
"and symbols, returning the top 200 meaningful terms. Useful "
|
576
|
+
"for understanding the codebase's domain vocabulary and "
|
577
|
+
"finding all functions/files related to specific concepts."
|
578
|
+
),
|
402
579
|
inputSchema={
|
403
580
|
"type": "object",
|
404
581
|
"properties": {
|
405
|
-
"projectName": {
|
406
|
-
|
407
|
-
|
408
|
-
|
582
|
+
"projectName": {
|
583
|
+
"type": "string",
|
584
|
+
"description": "The name of the project",
|
585
|
+
},
|
586
|
+
"folderPath": {
|
587
|
+
"type": "string",
|
588
|
+
"description": (
|
589
|
+
"Absolute path to the project folder on disk"
|
590
|
+
),
|
591
|
+
},
|
592
|
+
"limit": {
|
593
|
+
"type": "integer",
|
594
|
+
"default": 200,
|
595
|
+
"description": "Number of top terms to return",
|
596
|
+
},
|
409
597
|
},
|
410
598
|
"required": ["projectName", "folderPath"],
|
411
|
-
"additionalProperties": False
|
412
|
-
}
|
599
|
+
"additionalProperties": False,
|
600
|
+
},
|
413
601
|
),
|
414
602
|
types.Tool(
|
415
603
|
name="check_database_health",
|
416
|
-
description=
|
604
|
+
description=(
|
605
|
+
"Perform health diagnostics for the MCP Code Indexer's SQLite "
|
606
|
+
"database and connection pool. Returns database resilience "
|
607
|
+
"metrics, connection pool status, WAL mode performance, and "
|
608
|
+
"file description storage statistics for monitoring the code "
|
609
|
+
"indexer's database locking improvements."
|
610
|
+
),
|
417
611
|
inputSchema={
|
418
612
|
"type": "object",
|
419
613
|
"properties": {},
|
420
|
-
"additionalProperties": False
|
421
|
-
}
|
614
|
+
"additionalProperties": False,
|
615
|
+
},
|
422
616
|
),
|
423
617
|
types.Tool(
|
424
618
|
name="search_codebase_overview",
|
425
|
-
description=
|
619
|
+
description=(
|
620
|
+
"Search for a single word in the codebase overview and return "
|
621
|
+
"2 sentences before and after where the word is found. Useful "
|
622
|
+
"for quickly finding specific information in large overviews."
|
623
|
+
),
|
426
624
|
inputSchema={
|
427
625
|
"type": "object",
|
428
626
|
"properties": {
|
429
|
-
"projectName": {
|
430
|
-
|
431
|
-
|
432
|
-
|
627
|
+
"projectName": {
|
628
|
+
"type": "string",
|
629
|
+
"description": "The name of the project",
|
630
|
+
},
|
631
|
+
"folderPath": {
|
632
|
+
"type": "string",
|
633
|
+
"description": (
|
634
|
+
"Absolute path to the project folder on disk"
|
635
|
+
),
|
636
|
+
},
|
637
|
+
"searchWord": {
|
638
|
+
"type": "string",
|
639
|
+
"description": (
|
640
|
+
"Single word to search for in the overview"
|
641
|
+
),
|
642
|
+
},
|
433
643
|
},
|
434
644
|
"required": ["projectName", "folderPath", "searchWord"],
|
435
|
-
"additionalProperties": False
|
436
|
-
}
|
437
|
-
)
|
645
|
+
"additionalProperties": False,
|
646
|
+
},
|
647
|
+
),
|
438
648
|
]
|
439
|
-
|
649
|
+
|
440
650
|
@self.server.call_tool()
|
441
|
-
async def call_tool(
|
651
|
+
async def call_tool(
|
652
|
+
name: str, arguments: Dict[str, Any]
|
653
|
+
) -> List[types.TextContent]:
|
442
654
|
"""Handle tool calls with middleware."""
|
443
655
|
import time
|
656
|
+
|
444
657
|
start_time = time.time()
|
445
|
-
|
658
|
+
|
446
659
|
logger.info(f"=== MCP Tool Call: {name} ===")
|
447
660
|
logger.info(f"Arguments: {', '.join(arguments.keys())}")
|
448
|
-
|
661
|
+
|
449
662
|
# Map tool names to handler methods
|
450
663
|
tool_handlers = {
|
451
664
|
"get_file_description": self._handle_get_file_description,
|
@@ -457,67 +670,69 @@ src/
|
|
457
670
|
"get_codebase_overview": self._handle_get_condensed_overview,
|
458
671
|
"update_codebase_overview": self._handle_update_codebase_overview,
|
459
672
|
"get_word_frequency": self._handle_get_word_frequency,
|
460
|
-
|
461
673
|
"check_database_health": self._handle_check_database_health,
|
462
674
|
"search_codebase_overview": self._handle_search_codebase_overview,
|
463
675
|
}
|
464
|
-
|
676
|
+
|
465
677
|
if name not in tool_handlers:
|
466
678
|
logger.error(f"Unknown tool requested: {name}")
|
467
679
|
from ..error_handler import ValidationError
|
680
|
+
|
468
681
|
raise ValidationError(f"Unknown tool: {name}")
|
469
|
-
|
682
|
+
|
470
683
|
# Wrap handler with middleware
|
471
684
|
wrapped_handler = self.middleware.wrap_tool_handler(name)(
|
472
685
|
lambda args: self._execute_tool_handler(tool_handlers[name], args)
|
473
686
|
)
|
474
|
-
|
687
|
+
|
475
688
|
try:
|
476
689
|
result = await wrapped_handler(arguments)
|
477
|
-
|
690
|
+
|
478
691
|
elapsed_time = time.time() - start_time
|
479
|
-
logger.info(
|
480
|
-
|
692
|
+
logger.info(
|
693
|
+
f"MCP Tool '{name}' completed successfully in {elapsed_time:.2f}s"
|
694
|
+
)
|
695
|
+
|
481
696
|
return result
|
482
697
|
except Exception as e:
|
483
698
|
elapsed_time = time.time() - start_time
|
484
699
|
logger.error(f"MCP Tool '{name}' failed after {elapsed_time:.2f}s: {e}")
|
485
700
|
logger.error(f"Exception details: {type(e).__name__}: {str(e)}")
|
486
701
|
raise
|
487
|
-
|
488
|
-
async def _execute_tool_handler(
|
702
|
+
|
703
|
+
async def _execute_tool_handler(
|
704
|
+
self, handler, arguments: Dict[str, Any]
|
705
|
+
) -> List[types.TextContent]:
|
489
706
|
"""Execute a tool handler and format the result."""
|
490
707
|
# Clean HTML entities from all arguments before processing
|
491
708
|
cleaned_arguments = self._clean_arguments(arguments)
|
492
|
-
|
709
|
+
|
493
710
|
result = await handler(cleaned_arguments)
|
494
|
-
|
495
|
-
return [
|
496
|
-
|
497
|
-
|
498
|
-
|
499
|
-
|
711
|
+
|
712
|
+
return [
|
713
|
+
types.TextContent(
|
714
|
+
type="text", text=json.dumps(result, indent=2, default=str)
|
715
|
+
)
|
716
|
+
]
|
717
|
+
|
500
718
|
async def _get_or_create_project_id(self, arguments: Dict[str, Any]) -> str:
|
501
719
|
"""
|
502
720
|
Get or create a project ID using intelligent matching.
|
503
|
-
|
721
|
+
|
504
722
|
Matches projects based on identification factors:
|
505
723
|
1. Project name (normalized, case-insensitive)
|
506
724
|
2. Folder path in aliases
|
507
|
-
|
725
|
+
|
508
726
|
Projects are now identified primarily by name without git coupling.
|
509
727
|
"""
|
510
728
|
project_name = arguments["projectName"]
|
511
729
|
folder_path = arguments["folderPath"]
|
512
730
|
|
513
|
-
|
514
731
|
# Normalize project name for case-insensitive matching
|
515
732
|
normalized_name = project_name.lower()
|
516
|
-
|
733
|
+
|
517
734
|
# Find potential project matches
|
518
|
-
project = await self._find_matching_project(
|
519
|
-
normalized_name, folder_path
|
520
|
-
)
|
735
|
+
project = await self._find_matching_project(normalized_name, folder_path)
|
521
736
|
if project:
|
522
737
|
# Update project metadata and aliases
|
523
738
|
await self._update_existing_project(project, normalized_name, folder_path)
|
@@ -529,63 +744,72 @@ src/
|
|
529
744
|
name=normalized_name,
|
530
745
|
aliases=[folder_path],
|
531
746
|
created=datetime.utcnow(),
|
532
|
-
last_accessed=datetime.utcnow()
|
747
|
+
last_accessed=datetime.utcnow(),
|
533
748
|
)
|
534
749
|
await self.db_manager.create_project(project)
|
535
750
|
logger.info(f"Created new project: {normalized_name} ({project_id})")
|
536
|
-
|
751
|
+
|
537
752
|
return project.id
|
538
|
-
|
753
|
+
|
539
754
|
async def _find_matching_project(
|
540
|
-
self,
|
541
|
-
normalized_name: str,
|
542
|
-
folder_path: str
|
755
|
+
self, normalized_name: str, folder_path: str
|
543
756
|
) -> Optional[Project]:
|
544
757
|
"""
|
545
758
|
Find a matching project using name and folder path matching.
|
546
|
-
|
759
|
+
|
547
760
|
Returns the best matching project or None if no sufficient match is found.
|
548
761
|
"""
|
549
762
|
all_projects = await self.db_manager.get_all_projects()
|
550
|
-
|
763
|
+
|
551
764
|
best_match = None
|
552
765
|
best_score = 0
|
553
|
-
|
766
|
+
|
554
767
|
for project in all_projects:
|
555
768
|
score = 0
|
556
769
|
match_factors = []
|
557
|
-
|
770
|
+
|
558
771
|
# Factor 1: Project name match (primary identifier)
|
559
772
|
if project.name.lower() == normalized_name:
|
560
773
|
score += 2 # Higher weight for name match
|
561
774
|
match_factors.append("name")
|
562
|
-
|
775
|
+
|
563
776
|
# Factor 2: Folder path in aliases
|
564
|
-
project_aliases =
|
777
|
+
project_aliases = (
|
778
|
+
json.loads(project.aliases)
|
779
|
+
if isinstance(project.aliases, str)
|
780
|
+
else project.aliases
|
781
|
+
)
|
565
782
|
if folder_path in project_aliases:
|
566
783
|
score += 1
|
567
784
|
match_factors.append("folder_path")
|
568
|
-
|
785
|
+
|
569
786
|
# If we have a name match, it's a strong candidate
|
570
787
|
if score >= 2:
|
571
788
|
if score > best_score:
|
572
789
|
best_score = score
|
573
790
|
best_match = project
|
574
|
-
logger.info(
|
575
|
-
|
791
|
+
logger.info(
|
792
|
+
f"Match for project {project.name} "
|
793
|
+
f"(score: {score}, factors: {match_factors})"
|
794
|
+
)
|
795
|
+
|
576
796
|
# If only name matches, check file similarity for potential matches
|
577
797
|
elif score == 1 and "name" in match_factors:
|
578
798
|
if await self._check_file_similarity(project, folder_path):
|
579
|
-
logger.info(
|
799
|
+
logger.info(
|
800
|
+
f"File similarity match for project {project.name} "
|
801
|
+
f"(factor: {match_factors[0]})"
|
802
|
+
)
|
580
803
|
if score > best_score:
|
581
804
|
best_score = score
|
582
805
|
best_match = project
|
583
|
-
|
806
|
+
|
584
807
|
return best_match
|
585
|
-
|
808
|
+
|
586
809
|
async def _check_file_similarity(self, project: Project, folder_path: str) -> bool:
|
587
810
|
"""
|
588
|
-
Check if the files in the folder are similar to files already indexed
|
811
|
+
Check if the files in the folder are similar to files already indexed
|
812
|
+
for this project.
|
589
813
|
Returns True if 80%+ of files match.
|
590
814
|
"""
|
591
815
|
try:
|
@@ -593,163 +817,182 @@ src/
|
|
593
817
|
scanner = FileScanner(Path(folder_path))
|
594
818
|
if not scanner.is_valid_project_directory():
|
595
819
|
return False
|
596
|
-
|
820
|
+
|
597
821
|
current_files = scanner.scan_directory()
|
598
822
|
current_basenames = {f.name for f in current_files}
|
599
|
-
|
823
|
+
|
600
824
|
if not current_basenames:
|
601
825
|
return False
|
602
|
-
|
826
|
+
|
603
827
|
# Get files already indexed for this project
|
604
828
|
indexed_files = await self.db_manager.get_all_file_descriptions(project.id)
|
605
829
|
indexed_basenames = {Path(fd.file_path).name for fd in indexed_files}
|
606
|
-
|
830
|
+
|
607
831
|
if not indexed_basenames:
|
608
832
|
return False
|
609
|
-
|
833
|
+
|
610
834
|
# Calculate similarity
|
611
835
|
intersection = current_basenames & indexed_basenames
|
612
836
|
similarity = len(intersection) / len(current_basenames)
|
613
|
-
|
614
|
-
logger.debug(
|
615
|
-
|
837
|
+
|
838
|
+
logger.debug(
|
839
|
+
f"File similarity for {project.name}: {similarity:.2%} "
|
840
|
+
f"({len(intersection)}/{len(current_basenames)} files match)"
|
841
|
+
)
|
842
|
+
|
616
843
|
return similarity >= 0.8
|
617
844
|
except Exception as e:
|
618
845
|
logger.warning(f"Error checking file similarity: {e}")
|
619
846
|
return False
|
620
|
-
|
847
|
+
|
621
848
|
async def _update_existing_project(
|
622
|
-
self,
|
623
|
-
project: Project,
|
624
|
-
normalized_name: str,
|
625
|
-
folder_path: str
|
849
|
+
self, project: Project, normalized_name: str, folder_path: str
|
626
850
|
) -> None:
|
627
851
|
"""Update an existing project with new metadata and folder alias."""
|
628
852
|
# Update last accessed time
|
629
853
|
await self.db_manager.update_project_access_time(project.id)
|
630
|
-
|
854
|
+
|
631
855
|
should_update = False
|
632
|
-
|
856
|
+
|
633
857
|
# Update name if different
|
634
858
|
if project.name != normalized_name:
|
635
859
|
project.name = normalized_name
|
636
860
|
should_update = True
|
637
|
-
|
861
|
+
|
638
862
|
# Add folder path to aliases if not already present
|
639
|
-
project_aliases =
|
863
|
+
project_aliases = (
|
864
|
+
json.loads(project.aliases)
|
865
|
+
if isinstance(project.aliases, str)
|
866
|
+
else project.aliases
|
867
|
+
)
|
640
868
|
if folder_path not in project_aliases:
|
641
869
|
project_aliases.append(folder_path)
|
642
870
|
project.aliases = project_aliases
|
643
871
|
should_update = True
|
644
|
-
logger.info(
|
645
|
-
|
872
|
+
logger.info(
|
873
|
+
f"Added new folder alias to project {project.name}: {folder_path}"
|
874
|
+
)
|
875
|
+
|
646
876
|
if should_update:
|
647
877
|
await self.db_manager.update_project(project)
|
648
878
|
logger.debug(f"Updated project metadata for {project.name}")
|
649
|
-
|
650
879
|
|
651
|
-
|
652
|
-
|
880
|
+
async def _handle_get_file_description(
|
881
|
+
self, arguments: Dict[str, Any]
|
882
|
+
) -> Dict[str, Any]:
|
653
883
|
"""Handle get_file_description tool calls."""
|
654
884
|
project_id = await self._get_or_create_project_id(arguments)
|
655
|
-
|
885
|
+
|
656
886
|
file_desc = await self.db_manager.get_file_description(
|
657
|
-
project_id=project_id,
|
658
|
-
file_path=arguments["filePath"]
|
887
|
+
project_id=project_id, file_path=arguments["filePath"]
|
659
888
|
)
|
660
|
-
|
889
|
+
|
661
890
|
if file_desc:
|
662
891
|
return {
|
663
892
|
"exists": True,
|
664
893
|
"description": file_desc.description,
|
665
894
|
"lastModified": file_desc.last_modified.isoformat(),
|
666
895
|
"fileHash": file_desc.file_hash,
|
667
|
-
"version": file_desc.version
|
896
|
+
"version": file_desc.version,
|
668
897
|
}
|
669
898
|
else:
|
670
899
|
return {
|
671
900
|
"exists": False,
|
672
|
-
"message": f"No description found for {arguments['filePath']}"
|
901
|
+
"message": f"No description found for {arguments['filePath']}",
|
673
902
|
}
|
674
|
-
|
675
|
-
async def _handle_update_file_description(
|
903
|
+
|
904
|
+
async def _handle_update_file_description(
|
905
|
+
self, arguments: Dict[str, Any]
|
906
|
+
) -> Dict[str, Any]:
|
676
907
|
"""Handle update_file_description tool calls."""
|
677
908
|
logger.info(f"Updating file description for: {arguments['filePath']}")
|
678
909
|
logger.info(f"Project: {arguments.get('projectName', 'Unknown')}")
|
679
|
-
|
910
|
+
|
680
911
|
description_length = len(arguments.get("description", ""))
|
681
912
|
logger.info(f"Description length: {description_length} characters")
|
682
|
-
|
913
|
+
|
683
914
|
project_id = await self._get_or_create_project_id(arguments)
|
684
|
-
|
915
|
+
|
685
916
|
logger.info(f"Resolved project_id: {project_id}")
|
686
|
-
|
917
|
+
|
687
918
|
file_desc = FileDescription(
|
688
919
|
project_id=project_id,
|
689
920
|
file_path=arguments["filePath"],
|
690
921
|
description=arguments["description"],
|
691
922
|
file_hash=arguments.get("fileHash"),
|
692
923
|
last_modified=datetime.utcnow(),
|
693
|
-
version=1
|
924
|
+
version=1,
|
694
925
|
)
|
695
|
-
|
926
|
+
|
696
927
|
await self.db_manager.create_file_description(file_desc)
|
697
|
-
|
928
|
+
|
698
929
|
logger.info(f"Successfully updated description for: {arguments['filePath']}")
|
699
|
-
|
930
|
+
|
700
931
|
return {
|
701
932
|
"success": True,
|
702
933
|
"message": f"Description updated for {arguments['filePath']}",
|
703
934
|
"filePath": arguments["filePath"],
|
704
|
-
"lastModified": file_desc.last_modified.isoformat()
|
935
|
+
"lastModified": file_desc.last_modified.isoformat(),
|
705
936
|
}
|
706
|
-
|
707
|
-
async def _handle_check_codebase_size(
|
937
|
+
|
938
|
+
async def _handle_check_codebase_size(
|
939
|
+
self, arguments: Dict[str, Any]
|
940
|
+
) -> Dict[str, Any]:
|
708
941
|
"""Handle check_codebase_size tool calls."""
|
709
|
-
logger.info(
|
942
|
+
logger.info(
|
943
|
+
f"Checking codebase size for: {arguments.get('projectName', 'Unknown')}"
|
944
|
+
)
|
710
945
|
logger.info(f"Folder path: {arguments.get('folderPath', 'Unknown')}")
|
711
|
-
|
946
|
+
|
712
947
|
project_id = await self._get_or_create_project_id(arguments)
|
713
948
|
folder_path = Path(arguments["folderPath"])
|
714
|
-
|
949
|
+
|
715
950
|
logger.info(f"Resolved project_id: {project_id}")
|
716
|
-
|
951
|
+
|
717
952
|
# Clean up descriptions for files that no longer exist
|
718
953
|
logger.info("Cleaning up descriptions for missing files...")
|
719
954
|
cleaned_up_files = await self.db_manager.cleanup_missing_files(
|
720
|
-
project_id=project_id,
|
721
|
-
project_root=folder_path
|
955
|
+
project_id=project_id, project_root=folder_path
|
722
956
|
)
|
723
957
|
logger.info(f"Cleaned up {len(cleaned_up_files)} missing files")
|
724
|
-
|
958
|
+
|
725
959
|
# Get file descriptions for this project (after cleanup)
|
726
960
|
logger.info("Retrieving file descriptions...")
|
727
961
|
file_descriptions = await self.db_manager.get_all_file_descriptions(
|
728
962
|
project_id=project_id
|
729
963
|
)
|
730
964
|
logger.info(f"Found {len(file_descriptions)} file descriptions")
|
731
|
-
|
965
|
+
|
732
966
|
# Use provided token limit or fall back to server default
|
733
967
|
token_limit = arguments.get("tokenLimit", self.token_limit)
|
734
|
-
|
968
|
+
|
735
969
|
# Calculate total tokens for descriptions
|
736
970
|
logger.info("Calculating total token count...")
|
737
|
-
descriptions_tokens = self.token_counter.calculate_codebase_tokens(
|
738
|
-
|
971
|
+
descriptions_tokens = self.token_counter.calculate_codebase_tokens(
|
972
|
+
file_descriptions
|
973
|
+
)
|
974
|
+
|
739
975
|
# Get overview tokens if available
|
740
976
|
overview = await self.db_manager.get_project_overview(project_id)
|
741
977
|
overview_tokens = 0
|
742
978
|
if overview and overview.overview:
|
743
979
|
overview_tokens = self.token_counter.count_tokens(overview.overview)
|
744
|
-
|
980
|
+
|
745
981
|
total_tokens = descriptions_tokens + overview_tokens
|
746
982
|
is_large = total_tokens > token_limit
|
747
983
|
recommendation = "use_search" if is_large else "use_overview"
|
748
|
-
|
749
|
-
logger.info(
|
750
|
-
|
984
|
+
|
985
|
+
logger.info(
|
986
|
+
f"Codebase analysis complete: {total_tokens} tokens total "
|
987
|
+
f"({descriptions_tokens} descriptions + {overview_tokens} overview), "
|
988
|
+
f"{len(file_descriptions)} files"
|
989
|
+
)
|
990
|
+
logger.info(
|
991
|
+
f"Size assessment: {'LARGE' if is_large else 'SMALL'} "
|
992
|
+
f"(limit: {token_limit})"
|
993
|
+
)
|
751
994
|
logger.info(f"Recommendation: {recommendation}")
|
752
|
-
|
995
|
+
|
753
996
|
return {
|
754
997
|
"totalTokens": total_tokens,
|
755
998
|
"descriptionsTokens": descriptions_tokens,
|
@@ -759,19 +1002,24 @@ src/
|
|
759
1002
|
"tokenLimit": token_limit,
|
760
1003
|
"totalFiles": len(file_descriptions),
|
761
1004
|
"cleanedUpFiles": cleaned_up_files,
|
762
|
-
"cleanedUpCount": len(cleaned_up_files)
|
1005
|
+
"cleanedUpCount": len(cleaned_up_files),
|
763
1006
|
}
|
764
|
-
|
765
|
-
async def _handle_find_missing_descriptions(
|
1007
|
+
|
1008
|
+
async def _handle_find_missing_descriptions(
|
1009
|
+
self, arguments: Dict[str, Any]
|
1010
|
+
) -> Dict[str, Any]:
|
766
1011
|
"""Handle find_missing_descriptions tool calls."""
|
767
|
-
logger.info(
|
1012
|
+
logger.info(
|
1013
|
+
f"Finding missing descriptions for: "
|
1014
|
+
f"{arguments.get('projectName', 'Unknown')}"
|
1015
|
+
)
|
768
1016
|
logger.info(f"Folder path: {arguments.get('folderPath', 'Unknown')}")
|
769
|
-
|
1017
|
+
|
770
1018
|
project_id = await self._get_or_create_project_id(arguments)
|
771
1019
|
folder_path = Path(arguments["folderPath"])
|
772
|
-
|
1020
|
+
|
773
1021
|
logger.info(f"Resolved project_id: {project_id}")
|
774
|
-
|
1022
|
+
|
775
1023
|
# Get existing file descriptions
|
776
1024
|
logger.info("Retrieving existing file descriptions...")
|
777
1025
|
existing_descriptions = await self.db_manager.get_all_file_descriptions(
|
@@ -779,7 +1027,7 @@ src/
|
|
779
1027
|
)
|
780
1028
|
existing_paths = {desc.file_path for desc in existing_descriptions}
|
781
1029
|
logger.info(f"Found {len(existing_paths)} existing descriptions")
|
782
|
-
|
1030
|
+
|
783
1031
|
# Scan directory for files
|
784
1032
|
logger.info(f"Scanning project directory: {folder_path}")
|
785
1033
|
scanner = FileScanner(folder_path)
|
@@ -788,110 +1036,116 @@ src/
|
|
788
1036
|
return {
|
789
1037
|
"error": f"Invalid or inaccessible project directory: {folder_path}"
|
790
1038
|
}
|
791
|
-
|
1039
|
+
|
792
1040
|
missing_files = scanner.find_missing_files(existing_paths)
|
793
1041
|
missing_paths = [scanner.get_relative_path(f) for f in missing_files]
|
794
|
-
|
1042
|
+
|
795
1043
|
logger.info(f"Found {len(missing_paths)} files without descriptions")
|
796
|
-
|
1044
|
+
|
797
1045
|
# Apply limit if specified
|
798
1046
|
limit = arguments.get("limit")
|
799
1047
|
total_missing = len(missing_paths)
|
800
1048
|
if limit is not None and isinstance(limit, int) and limit > 0:
|
801
1049
|
missing_paths = missing_paths[:limit]
|
802
1050
|
logger.info(f"Applied limit {limit}, returning {len(missing_paths)} files")
|
803
|
-
|
1051
|
+
|
804
1052
|
# Get project stats
|
805
1053
|
stats = scanner.get_project_stats()
|
806
1054
|
logger.info(f"Project stats: {stats.get('total_files', 0)} total files")
|
807
|
-
|
1055
|
+
|
808
1056
|
return {
|
809
1057
|
"missingFiles": missing_paths,
|
810
1058
|
"totalMissing": total_missing,
|
811
1059
|
"returnedCount": len(missing_paths),
|
812
1060
|
"existingDescriptions": len(existing_paths),
|
813
|
-
"projectStats": stats
|
1061
|
+
"projectStats": stats,
|
814
1062
|
}
|
815
|
-
|
816
|
-
async def _handle_search_descriptions(
|
1063
|
+
|
1064
|
+
async def _handle_search_descriptions(
|
1065
|
+
self, arguments: Dict[str, Any]
|
1066
|
+
) -> Dict[str, Any]:
|
817
1067
|
"""Handle search_descriptions tool calls."""
|
818
1068
|
project_id = await self._get_or_create_project_id(arguments)
|
819
1069
|
max_results = arguments.get("maxResults", 20)
|
820
|
-
|
1070
|
+
|
821
1071
|
# Perform search
|
822
1072
|
search_results = await self.db_manager.search_file_descriptions(
|
823
|
-
project_id=project_id,
|
824
|
-
query=arguments["query"],
|
825
|
-
max_results=max_results
|
1073
|
+
project_id=project_id, query=arguments["query"], max_results=max_results
|
826
1074
|
)
|
827
|
-
|
1075
|
+
|
828
1076
|
# Format results
|
829
1077
|
formatted_results = []
|
830
1078
|
for result in search_results:
|
831
|
-
formatted_results.append(
|
832
|
-
|
833
|
-
|
834
|
-
|
835
|
-
|
836
|
-
|
1079
|
+
formatted_results.append(
|
1080
|
+
{
|
1081
|
+
"filePath": result.file_path,
|
1082
|
+
"description": result.description,
|
1083
|
+
"relevanceScore": result.relevance_score,
|
1084
|
+
}
|
1085
|
+
)
|
1086
|
+
|
837
1087
|
return {
|
838
1088
|
"results": formatted_results,
|
839
1089
|
"totalResults": len(formatted_results),
|
840
1090
|
"query": arguments["query"],
|
841
|
-
"maxResults": max_results
|
1091
|
+
"maxResults": max_results,
|
842
1092
|
}
|
843
|
-
|
844
|
-
async def _handle_get_codebase_overview(
|
1093
|
+
|
1094
|
+
async def _handle_get_codebase_overview(
|
1095
|
+
self, arguments: Dict[str, Any]
|
1096
|
+
) -> Dict[str, Any]:
|
845
1097
|
"""Handle get_codebase_overview tool calls."""
|
846
1098
|
project_id = await self._get_or_create_project_id(arguments)
|
847
|
-
|
1099
|
+
|
848
1100
|
# Get all file descriptions
|
849
1101
|
file_descriptions = await self.db_manager.get_all_file_descriptions(
|
850
1102
|
project_id=project_id
|
851
1103
|
)
|
852
|
-
|
1104
|
+
|
853
1105
|
# Calculate total tokens
|
854
1106
|
total_tokens = self.token_counter.calculate_codebase_tokens(file_descriptions)
|
855
1107
|
is_large = self.token_counter.is_large_codebase(total_tokens)
|
856
|
-
|
857
|
-
# Always build and return the folder structure - if the AI called this
|
1108
|
+
|
1109
|
+
# Always build and return the folder structure - if the AI called this
|
1110
|
+
# tool, it wants the overview
|
858
1111
|
structure = self._build_folder_structure(file_descriptions)
|
859
|
-
|
1112
|
+
|
860
1113
|
return {
|
861
1114
|
"projectName": arguments["projectName"],
|
862
1115
|
"totalFiles": len(file_descriptions),
|
863
1116
|
"totalTokens": total_tokens,
|
864
1117
|
"isLarge": is_large,
|
865
1118
|
"tokenLimit": self.token_counter.token_limit,
|
866
|
-
"structure": structure
|
1119
|
+
"structure": structure,
|
867
1120
|
}
|
868
|
-
|
869
|
-
def _build_folder_structure(
|
1121
|
+
|
1122
|
+
def _build_folder_structure(
|
1123
|
+
self, file_descriptions: List[FileDescription]
|
1124
|
+
) -> Dict[str, Any]:
|
870
1125
|
"""Build hierarchical folder structure from file descriptions."""
|
871
1126
|
root = {"path": "", "files": [], "folders": {}}
|
872
|
-
|
1127
|
+
|
873
1128
|
for file_desc in file_descriptions:
|
874
1129
|
path_parts = Path(file_desc.file_path).parts
|
875
1130
|
current = root
|
876
|
-
|
1131
|
+
|
877
1132
|
# Navigate/create folder structure
|
878
1133
|
for i, part in enumerate(path_parts[:-1]):
|
879
|
-
folder_path = "/".join(path_parts[:i+1])
|
1134
|
+
folder_path = "/".join(path_parts[: i + 1])
|
880
1135
|
if part not in current["folders"]:
|
881
1136
|
current["folders"][part] = {
|
882
1137
|
"path": folder_path,
|
883
1138
|
"files": [],
|
884
|
-
"folders": {}
|
1139
|
+
"folders": {},
|
885
1140
|
}
|
886
1141
|
current = current["folders"][part]
|
887
|
-
|
1142
|
+
|
888
1143
|
# Add file to current folder
|
889
1144
|
if path_parts: # Handle empty paths
|
890
|
-
current["files"].append(
|
891
|
-
"path": file_desc.file_path,
|
892
|
-
|
893
|
-
|
894
|
-
|
1145
|
+
current["files"].append(
|
1146
|
+
{"path": file_desc.file_path, "description": file_desc.description}
|
1147
|
+
)
|
1148
|
+
|
895
1149
|
# Convert nested dict structure to list format, skipping empty folders
|
896
1150
|
def convert_structure(node):
|
897
1151
|
folders = []
|
@@ -900,108 +1154,112 @@ src/
|
|
900
1154
|
# Only include folders that have files or non-empty subfolders
|
901
1155
|
if converted_folder["files"] or converted_folder["folders"]:
|
902
1156
|
folders.append(converted_folder)
|
903
|
-
|
904
|
-
return {
|
905
|
-
|
906
|
-
"files": node["files"],
|
907
|
-
"folders": folders
|
908
|
-
}
|
909
|
-
|
1157
|
+
|
1158
|
+
return {"path": node["path"], "files": node["files"], "folders": folders}
|
1159
|
+
|
910
1160
|
return convert_structure(root)
|
911
|
-
|
912
1161
|
|
913
|
-
|
914
|
-
|
1162
|
+
async def _handle_get_condensed_overview(
|
1163
|
+
self, arguments: Dict[str, Any]
|
1164
|
+
) -> Dict[str, Any]:
|
915
1165
|
"""Handle get_codebase_overview tool calls for condensed overviews."""
|
916
1166
|
project_id = await self._get_or_create_project_id(arguments)
|
917
|
-
|
1167
|
+
|
918
1168
|
# Try to get existing overview
|
919
1169
|
overview = await self.db_manager.get_project_overview(project_id)
|
920
|
-
|
1170
|
+
|
921
1171
|
if overview:
|
922
1172
|
return {
|
923
1173
|
"overview": overview.overview,
|
924
1174
|
"lastModified": overview.last_modified.isoformat(),
|
925
1175
|
"totalFiles": overview.total_files,
|
926
|
-
"totalTokensInFullDescriptions": overview.total_tokens
|
1176
|
+
"totalTokensInFullDescriptions": overview.total_tokens,
|
927
1177
|
}
|
928
1178
|
else:
|
929
1179
|
return {
|
930
1180
|
"overview": "",
|
931
1181
|
"lastModified": "",
|
932
1182
|
"totalFiles": 0,
|
933
|
-
"totalTokensInFullDescriptions": 0
|
1183
|
+
"totalTokensInFullDescriptions": 0,
|
934
1184
|
}
|
935
|
-
|
936
|
-
async def _handle_update_codebase_overview(
|
1185
|
+
|
1186
|
+
async def _handle_update_codebase_overview(
|
1187
|
+
self, arguments: Dict[str, Any]
|
1188
|
+
) -> Dict[str, Any]:
|
937
1189
|
"""Handle update_codebase_overview tool calls."""
|
938
1190
|
project_id = await self._get_or_create_project_id(arguments)
|
939
|
-
|
940
|
-
|
1191
|
+
|
941
1192
|
# Get current file count and total tokens for context
|
942
1193
|
file_descriptions = await self.db_manager.get_all_file_descriptions(
|
943
1194
|
project_id=project_id
|
944
1195
|
)
|
945
|
-
|
1196
|
+
|
946
1197
|
total_files = len(file_descriptions)
|
947
1198
|
total_tokens = self.token_counter.calculate_codebase_tokens(file_descriptions)
|
948
|
-
|
1199
|
+
|
949
1200
|
# Create overview record
|
950
1201
|
overview = ProjectOverview(
|
951
1202
|
project_id=project_id,
|
952
1203
|
overview=arguments["overview"],
|
953
1204
|
last_modified=datetime.utcnow(),
|
954
1205
|
total_files=total_files,
|
955
|
-
total_tokens=total_tokens
|
1206
|
+
total_tokens=total_tokens,
|
956
1207
|
)
|
957
|
-
|
1208
|
+
|
958
1209
|
await self.db_manager.create_project_overview(overview)
|
959
|
-
|
1210
|
+
|
960
1211
|
return {
|
961
1212
|
"success": True,
|
962
1213
|
"message": f"Overview updated for {total_files} files",
|
963
1214
|
"totalFiles": total_files,
|
964
1215
|
"totalTokens": total_tokens,
|
965
|
-
"overviewLength": len(arguments["overview"])
|
1216
|
+
"overviewLength": len(arguments["overview"]),
|
966
1217
|
}
|
967
|
-
|
968
|
-
async def _handle_get_word_frequency(
|
1218
|
+
|
1219
|
+
async def _handle_get_word_frequency(
|
1220
|
+
self, arguments: Dict[str, Any]
|
1221
|
+
) -> Dict[str, Any]:
|
969
1222
|
"""Handle get_word_frequency tool calls."""
|
970
1223
|
project_id = await self._get_or_create_project_id(arguments)
|
971
1224
|
limit = arguments.get("limit", 200)
|
972
|
-
|
1225
|
+
|
973
1226
|
# Analyze word frequency
|
974
1227
|
result = await self.db_manager.analyze_word_frequency(
|
975
|
-
project_id=project_id,
|
976
|
-
limit=limit
|
1228
|
+
project_id=project_id, limit=limit
|
977
1229
|
)
|
978
|
-
|
1230
|
+
|
979
1231
|
return {
|
980
|
-
"topTerms": [
|
1232
|
+
"topTerms": [
|
1233
|
+
{"term": term.term, "frequency": term.frequency}
|
1234
|
+
for term in result.top_terms
|
1235
|
+
],
|
981
1236
|
"totalTermsAnalyzed": result.total_terms_analyzed,
|
982
|
-
"totalUniqueTerms": result.total_unique_terms
|
1237
|
+
"totalUniqueTerms": result.total_unique_terms,
|
983
1238
|
}
|
984
|
-
|
985
|
-
async def _handle_search_codebase_overview(
|
1239
|
+
|
1240
|
+
async def _handle_search_codebase_overview(
|
1241
|
+
self, arguments: Dict[str, Any]
|
1242
|
+
) -> Dict[str, Any]:
|
986
1243
|
"""Handle search_codebase_overview tool calls."""
|
987
1244
|
project_id = await self._get_or_create_project_id(arguments)
|
988
1245
|
search_word = arguments["searchWord"].lower()
|
989
|
-
|
1246
|
+
|
990
1247
|
# Get the overview
|
991
1248
|
overview = await self.db_manager.get_project_overview(project_id)
|
992
|
-
|
1249
|
+
|
993
1250
|
if not overview or not overview.overview:
|
994
1251
|
return {
|
995
1252
|
"found": False,
|
996
1253
|
"message": "No overview found for this project",
|
997
|
-
"searchWord": arguments["searchWord"]
|
1254
|
+
"searchWord": arguments["searchWord"],
|
998
1255
|
}
|
999
|
-
|
1256
|
+
|
1000
1257
|
# Split overview into sentences
|
1001
1258
|
import re
|
1002
|
-
|
1259
|
+
|
1260
|
+
sentences = re.split(r"[.!?]+", overview.overview)
|
1003
1261
|
sentences = [s.strip() for s in sentences if s.strip()]
|
1004
|
-
|
1262
|
+
|
1005
1263
|
# Find matches
|
1006
1264
|
matches = []
|
1007
1265
|
for i, sentence in enumerate(sentences):
|
@@ -1009,170 +1267,209 @@ src/
|
|
1009
1267
|
# Get context: 2 sentences before and after
|
1010
1268
|
start_idx = max(0, i - 2)
|
1011
1269
|
end_idx = min(len(sentences), i + 3)
|
1012
|
-
|
1270
|
+
|
1013
1271
|
context_sentences = sentences[start_idx:end_idx]
|
1014
|
-
context =
|
1015
|
-
|
1016
|
-
matches.append(
|
1017
|
-
|
1018
|
-
|
1019
|
-
|
1020
|
-
|
1021
|
-
|
1022
|
-
|
1023
|
-
|
1272
|
+
context = ". ".join(context_sentences) + "."
|
1273
|
+
|
1274
|
+
matches.append(
|
1275
|
+
{
|
1276
|
+
"matchIndex": i,
|
1277
|
+
"matchSentence": sentence,
|
1278
|
+
"context": context,
|
1279
|
+
"contextStartIndex": start_idx,
|
1280
|
+
"contextEndIndex": end_idx - 1,
|
1281
|
+
}
|
1282
|
+
)
|
1283
|
+
|
1024
1284
|
return {
|
1025
1285
|
"found": len(matches) > 0,
|
1026
1286
|
"searchWord": arguments["searchWord"],
|
1027
1287
|
"matches": matches,
|
1028
1288
|
"totalMatches": len(matches),
|
1029
|
-
"totalSentences": len(sentences)
|
1289
|
+
"totalSentences": len(sentences),
|
1030
1290
|
}
|
1031
1291
|
|
1032
|
-
async def _handle_check_database_health(
|
1292
|
+
async def _handle_check_database_health(
|
1293
|
+
self, arguments: Dict[str, Any]
|
1294
|
+
) -> Dict[str, Any]:
|
1033
1295
|
"""
|
1034
1296
|
Handle check_database_health tool calls with comprehensive diagnostics.
|
1035
|
-
|
1297
|
+
|
1036
1298
|
Returns detailed database health information including retry statistics,
|
1037
1299
|
performance analysis, and resilience indicators.
|
1038
1300
|
"""
|
1039
1301
|
# Get comprehensive health diagnostics from the enhanced monitor
|
1040
|
-
if
|
1041
|
-
|
1302
|
+
if (
|
1303
|
+
hasattr(self.db_manager, "_health_monitor")
|
1304
|
+
and self.db_manager._health_monitor
|
1305
|
+
):
|
1306
|
+
comprehensive_diagnostics = (
|
1307
|
+
self.db_manager._health_monitor.get_comprehensive_diagnostics()
|
1308
|
+
)
|
1042
1309
|
else:
|
1043
1310
|
# Fallback to basic health check if monitor not available
|
1044
1311
|
health_check = await self.db_manager.check_health()
|
1045
1312
|
comprehensive_diagnostics = {
|
1046
1313
|
"basic_health_check": health_check,
|
1047
|
-
"note": "Enhanced health monitoring not available"
|
1314
|
+
"note": "Enhanced health monitoring not available",
|
1048
1315
|
}
|
1049
|
-
|
1316
|
+
|
1050
1317
|
# Get additional database-level statistics
|
1051
1318
|
database_stats = self.db_manager.get_database_stats()
|
1052
|
-
|
1319
|
+
|
1053
1320
|
return {
|
1054
1321
|
"comprehensive_diagnostics": comprehensive_diagnostics,
|
1055
1322
|
"database_statistics": database_stats,
|
1056
1323
|
"configuration": {
|
1057
1324
|
**self.db_config,
|
1058
1325
|
"retry_executor_config": (
|
1059
|
-
self.db_manager._retry_executor.config.__dict__
|
1060
|
-
if hasattr(self.db_manager,
|
1326
|
+
self.db_manager._retry_executor.config.__dict__
|
1327
|
+
if hasattr(self.db_manager, "_retry_executor")
|
1328
|
+
and self.db_manager._retry_executor
|
1061
1329
|
else {}
|
1062
|
-
)
|
1330
|
+
),
|
1063
1331
|
},
|
1064
1332
|
"server_info": {
|
1065
1333
|
"token_limit": self.token_limit,
|
1066
1334
|
"db_path": str(self.db_path),
|
1067
1335
|
"cache_dir": str(self.cache_dir),
|
1068
1336
|
"health_monitoring_enabled": (
|
1069
|
-
hasattr(self.db_manager,
|
1070
|
-
self.db_manager._health_monitor is not None
|
1071
|
-
)
|
1337
|
+
hasattr(self.db_manager, "_health_monitor")
|
1338
|
+
and self.db_manager._health_monitor is not None
|
1339
|
+
),
|
1072
1340
|
},
|
1073
1341
|
"timestamp": datetime.utcnow().isoformat(),
|
1074
|
-
"status_summary": self._generate_health_summary(comprehensive_diagnostics)
|
1342
|
+
"status_summary": self._generate_health_summary(comprehensive_diagnostics),
|
1075
1343
|
}
|
1076
|
-
|
1344
|
+
|
1077
1345
|
def _generate_health_summary(self, diagnostics: Dict[str, Any]) -> Dict[str, Any]:
|
1078
1346
|
"""Generate a concise health summary from comprehensive diagnostics."""
|
1079
1347
|
if "resilience_indicators" not in diagnostics:
|
1080
1348
|
return {"status": "limited_diagnostics_available"}
|
1081
|
-
|
1349
|
+
|
1082
1350
|
resilience = diagnostics["resilience_indicators"]
|
1083
1351
|
performance = diagnostics.get("performance_analysis", {})
|
1084
|
-
|
1352
|
+
|
1085
1353
|
# Overall status based on health score
|
1086
1354
|
health_score = resilience.get("overall_health_score", 0)
|
1087
1355
|
if health_score >= 90:
|
1088
1356
|
status = "excellent"
|
1089
1357
|
elif health_score >= 75:
|
1090
|
-
status = "good"
|
1358
|
+
status = "good"
|
1091
1359
|
elif health_score >= 50:
|
1092
1360
|
status = "fair"
|
1093
1361
|
else:
|
1094
1362
|
status = "poor"
|
1095
|
-
|
1363
|
+
|
1096
1364
|
return {
|
1097
1365
|
"overall_status": status,
|
1098
1366
|
"health_score": health_score,
|
1099
|
-
"retry_effectiveness": resilience.get("retry_effectiveness", {}).get(
|
1100
|
-
|
1101
|
-
|
1102
|
-
"
|
1367
|
+
"retry_effectiveness": resilience.get("retry_effectiveness", {}).get(
|
1368
|
+
"is_effective", False
|
1369
|
+
),
|
1370
|
+
"connection_stability": resilience.get("connection_stability", {}).get(
|
1371
|
+
"is_stable", False
|
1372
|
+
),
|
1373
|
+
"key_recommendations": resilience.get("recommendations", [])[
|
1374
|
+
:3
|
1375
|
+
], # Top 3 recommendations
|
1376
|
+
"performance_trend": performance.get("health_check_performance", {}).get(
|
1377
|
+
"recent_performance_trend", "unknown"
|
1378
|
+
),
|
1103
1379
|
}
|
1104
|
-
|
1105
|
-
async def _run_session_with_retry(
|
1380
|
+
|
1381
|
+
async def _run_session_with_retry(
|
1382
|
+
self, read_stream, write_stream, initialization_options
|
1383
|
+
) -> None:
|
1106
1384
|
"""Run a single MCP session with error handling and retry logic."""
|
1107
1385
|
max_retries = 3
|
1108
1386
|
base_delay = 1.0 # seconds
|
1109
|
-
|
1387
|
+
|
1110
1388
|
for attempt in range(max_retries + 1):
|
1111
1389
|
try:
|
1112
|
-
logger.info(
|
1113
|
-
|
1114
|
-
read_stream,
|
1115
|
-
write_stream,
|
1116
|
-
initialization_options
|
1390
|
+
logger.info(
|
1391
|
+
f"Starting MCP server protocol session (attempt {attempt + 1})..."
|
1117
1392
|
)
|
1393
|
+
await self.server.run(read_stream, write_stream, initialization_options)
|
1118
1394
|
logger.info("MCP server session completed normally")
|
1119
1395
|
return # Success, exit retry loop
|
1120
|
-
|
1396
|
+
|
1121
1397
|
except ValidationError as e:
|
1122
1398
|
# Handle malformed requests gracefully
|
1123
|
-
logger.warning(
|
1124
|
-
"
|
1125
|
-
|
1126
|
-
"
|
1127
|
-
|
1128
|
-
|
1129
|
-
|
1130
|
-
|
1131
|
-
|
1399
|
+
logger.warning(
|
1400
|
+
f"Received malformed request (attempt {attempt + 1}): {e}",
|
1401
|
+
extra={
|
1402
|
+
"structured_data": {
|
1403
|
+
"error_type": "ValidationError",
|
1404
|
+
"validation_errors": (
|
1405
|
+
e.errors() if hasattr(e, "errors") else str(e)
|
1406
|
+
),
|
1407
|
+
"attempt": attempt + 1,
|
1408
|
+
"max_retries": max_retries,
|
1409
|
+
}
|
1410
|
+
},
|
1411
|
+
)
|
1412
|
+
|
1132
1413
|
if attempt < max_retries:
|
1133
|
-
delay = base_delay * (2
|
1414
|
+
delay = base_delay * (2**attempt) # Exponential backoff
|
1134
1415
|
logger.info(f"Retrying in {delay} seconds...")
|
1135
1416
|
await asyncio.sleep(delay)
|
1136
1417
|
else:
|
1137
|
-
logger.error(
|
1418
|
+
logger.error(
|
1419
|
+
"Max retries exceeded for validation errors. Server will "
|
1420
|
+
"continue but this session failed."
|
1421
|
+
)
|
1138
1422
|
return
|
1139
|
-
|
1423
|
+
|
1140
1424
|
except (ConnectionError, BrokenPipeError, EOFError) as e:
|
1141
1425
|
# Handle client disconnection gracefully
|
1142
1426
|
logger.info(f"Client disconnected: {e}")
|
1143
1427
|
return
|
1144
|
-
|
1428
|
+
|
1145
1429
|
except Exception as e:
|
1146
1430
|
# Handle other exceptions with full logging
|
1147
1431
|
import traceback
|
1148
|
-
|
1432
|
+
|
1433
|
+
if "unhandled errors in a TaskGroup" in str(
|
1434
|
+
e
|
1435
|
+
) and "ValidationError" in str(e):
|
1149
1436
|
# This is likely a ValidationError wrapped in a TaskGroup exception
|
1150
|
-
logger.warning(
|
1151
|
-
"
|
1152
|
-
|
1153
|
-
|
1154
|
-
"
|
1155
|
-
|
1156
|
-
|
1157
|
-
|
1158
|
-
|
1159
|
-
|
1437
|
+
logger.warning(
|
1438
|
+
f"Detected wrapped validation error "
|
1439
|
+
f"(attempt {attempt + 1}): {e}",
|
1440
|
+
extra={
|
1441
|
+
"structured_data": {
|
1442
|
+
"error_type": type(e).__name__,
|
1443
|
+
"error_message": str(e),
|
1444
|
+
"attempt": attempt + 1,
|
1445
|
+
"max_retries": max_retries,
|
1446
|
+
"likely_validation_error": True,
|
1447
|
+
}
|
1448
|
+
},
|
1449
|
+
)
|
1450
|
+
|
1160
1451
|
if attempt < max_retries:
|
1161
|
-
delay = base_delay * (2
|
1452
|
+
delay = base_delay * (2**attempt)
|
1162
1453
|
logger.info(f"Retrying in {delay} seconds...")
|
1163
1454
|
await asyncio.sleep(delay)
|
1164
1455
|
else:
|
1165
|
-
logger.error(
|
1456
|
+
logger.error(
|
1457
|
+
"Max retries exceeded for validation errors. Server will "
|
1458
|
+
"continue but this session failed."
|
1459
|
+
)
|
1166
1460
|
return
|
1167
1461
|
else:
|
1168
1462
|
# This is a genuine error, log and re-raise
|
1169
|
-
logger.error(
|
1170
|
-
"
|
1171
|
-
|
1172
|
-
"
|
1173
|
-
|
1174
|
-
|
1175
|
-
|
1463
|
+
logger.error(
|
1464
|
+
f"MCP server session error: {e}",
|
1465
|
+
extra={
|
1466
|
+
"structured_data": {
|
1467
|
+
"error_type": type(e).__name__,
|
1468
|
+
"error_message": str(e),
|
1469
|
+
"traceback": traceback.format_exc(),
|
1470
|
+
}
|
1471
|
+
},
|
1472
|
+
)
|
1176
1473
|
raise
|
1177
1474
|
|
1178
1475
|
async def run(self) -> None:
|
@@ -1180,80 +1477,99 @@ src/
|
|
1180
1477
|
logger.info("Starting server initialization...")
|
1181
1478
|
await self.initialize()
|
1182
1479
|
logger.info("Server initialization completed, starting MCP protocol...")
|
1183
|
-
|
1480
|
+
|
1184
1481
|
max_retries = 5
|
1185
1482
|
base_delay = 2.0 # seconds
|
1186
|
-
|
1483
|
+
|
1187
1484
|
for attempt in range(max_retries + 1):
|
1188
1485
|
try:
|
1189
1486
|
async with stdio_server() as (read_stream, write_stream):
|
1190
|
-
logger.info(
|
1487
|
+
logger.info(
|
1488
|
+
f"stdio_server context established (attempt {attempt + 1})"
|
1489
|
+
)
|
1191
1490
|
initialization_options = self.server.create_initialization_options()
|
1192
1491
|
logger.debug(f"Initialization options: {initialization_options}")
|
1193
|
-
|
1194
|
-
await self._run_session_with_retry(
|
1492
|
+
|
1493
|
+
await self._run_session_with_retry(
|
1494
|
+
read_stream, write_stream, initialization_options
|
1495
|
+
)
|
1195
1496
|
return # Success, exit retry loop
|
1196
|
-
|
1497
|
+
|
1197
1498
|
except KeyboardInterrupt:
|
1198
1499
|
logger.info("Server stopped by user interrupt")
|
1199
1500
|
return
|
1200
|
-
|
1501
|
+
|
1201
1502
|
except Exception as e:
|
1202
1503
|
import traceback
|
1203
|
-
|
1504
|
+
|
1204
1505
|
# Check if this is a wrapped validation error
|
1205
1506
|
error_str = str(e)
|
1206
1507
|
is_validation_error = (
|
1207
|
-
"ValidationError" in error_str
|
1208
|
-
"Field required" in error_str
|
1209
|
-
"Input should be" in error_str
|
1210
|
-
"pydantic_core._pydantic_core.ValidationError" in error_str
|
1508
|
+
"ValidationError" in error_str
|
1509
|
+
or "Field required" in error_str
|
1510
|
+
or "Input should be" in error_str
|
1511
|
+
or "pydantic_core._pydantic_core.ValidationError" in error_str
|
1211
1512
|
)
|
1212
|
-
|
1513
|
+
|
1213
1514
|
if is_validation_error:
|
1214
|
-
logger.warning(
|
1215
|
-
"
|
1216
|
-
|
1217
|
-
|
1218
|
-
"
|
1219
|
-
|
1220
|
-
|
1221
|
-
|
1222
|
-
|
1223
|
-
|
1515
|
+
logger.warning(
|
1516
|
+
f"Detected validation error in session "
|
1517
|
+
f"(attempt {attempt + 1}): Malformed client request",
|
1518
|
+
extra={
|
1519
|
+
"structured_data": {
|
1520
|
+
"error_type": "ValidationError",
|
1521
|
+
"error_message": (
|
1522
|
+
"Client sent malformed request "
|
1523
|
+
"(likely missing clientInfo)"
|
1524
|
+
),
|
1525
|
+
"attempt": attempt + 1,
|
1526
|
+
"max_retries": max_retries,
|
1527
|
+
"will_retry": attempt < max_retries,
|
1528
|
+
}
|
1529
|
+
},
|
1530
|
+
)
|
1531
|
+
|
1224
1532
|
if attempt < max_retries:
|
1225
|
-
delay = base_delay * (
|
1533
|
+
delay = base_delay * (
|
1534
|
+
2 ** min(attempt, 3)
|
1535
|
+
) # Cap exponential growth
|
1226
1536
|
logger.info(f"Retrying server in {delay} seconds...")
|
1227
1537
|
await asyncio.sleep(delay)
|
1228
1538
|
continue
|
1229
1539
|
else:
|
1230
|
-
logger.warning(
|
1540
|
+
logger.warning(
|
1541
|
+
"Max retries exceeded for validation errors. Server is "
|
1542
|
+
"robust against malformed requests."
|
1543
|
+
)
|
1231
1544
|
return
|
1232
1545
|
else:
|
1233
1546
|
# This is a genuine fatal error
|
1234
|
-
logger.error(
|
1235
|
-
"
|
1236
|
-
|
1237
|
-
"
|
1238
|
-
|
1239
|
-
|
1240
|
-
|
1547
|
+
logger.error(
|
1548
|
+
f"Fatal server error: {e}",
|
1549
|
+
extra={
|
1550
|
+
"structured_data": {
|
1551
|
+
"error_type": type(e).__name__,
|
1552
|
+
"error_message": str(e),
|
1553
|
+
"traceback": traceback.format_exc(),
|
1554
|
+
}
|
1555
|
+
},
|
1556
|
+
)
|
1241
1557
|
raise
|
1242
|
-
|
1558
|
+
|
1243
1559
|
# Clean shutdown
|
1244
1560
|
await self.shutdown()
|
1245
|
-
|
1561
|
+
|
1246
1562
|
async def shutdown(self) -> None:
|
1247
1563
|
"""Clean shutdown of server resources."""
|
1248
1564
|
try:
|
1249
1565
|
# Cancel any running tasks
|
1250
1566
|
self.task_manager.cancel_all()
|
1251
|
-
|
1567
|
+
|
1252
1568
|
# Close database connections
|
1253
1569
|
await self.db_manager.close_pool()
|
1254
|
-
|
1570
|
+
|
1255
1571
|
self.logger.info("Server shutdown completed successfully")
|
1256
|
-
|
1572
|
+
|
1257
1573
|
except Exception as e:
|
1258
1574
|
self.error_handler.log_error(e, context={"phase": "shutdown"})
|
1259
1575
|
|
@@ -1261,14 +1577,14 @@ src/
|
|
1261
1577
|
async def main():
|
1262
1578
|
"""Main entry point for the MCP server."""
|
1263
1579
|
import sys
|
1264
|
-
|
1580
|
+
|
1265
1581
|
# Setup logging to stderr (stdout is used for MCP communication)
|
1266
1582
|
logging.basicConfig(
|
1267
1583
|
level=logging.INFO,
|
1268
1584
|
format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
|
1269
|
-
handlers=[logging.StreamHandler(sys.stderr)]
|
1585
|
+
handlers=[logging.StreamHandler(sys.stderr)],
|
1270
1586
|
)
|
1271
|
-
|
1587
|
+
|
1272
1588
|
# Create and run server
|
1273
1589
|
server = MCPCodeIndexServer()
|
1274
1590
|
await server.run()
|