mcp-code-indexer 1.1.3__py3-none-any.whl → 1.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mcp_code_indexer/__main__.py +11 -0
- mcp_code_indexer/data/stop_words_english.txt +851 -0
- mcp_code_indexer/database/database.py +155 -1
- mcp_code_indexer/database/models.py +35 -0
- mcp_code_indexer/main.py +265 -0
- mcp_code_indexer/server/mcp_server.py +264 -36
- {mcp_code_indexer-1.1.3.dist-info → mcp_code_indexer-1.2.0.dist-info}/METADATA +7 -4
- {mcp_code_indexer-1.1.3.dist-info → mcp_code_indexer-1.2.0.dist-info}/RECORD +12 -10
- {mcp_code_indexer-1.1.3.dist-info → mcp_code_indexer-1.2.0.dist-info}/WHEEL +0 -0
- {mcp_code_indexer-1.1.3.dist-info → mcp_code_indexer-1.2.0.dist-info}/entry_points.txt +0 -0
- {mcp_code_indexer-1.1.3.dist-info → mcp_code_indexer-1.2.0.dist-info}/licenses/LICENSE +0 -0
- {mcp_code_indexer-1.1.3.dist-info → mcp_code_indexer-1.2.0.dist-info}/top_level.txt +0 -0
@@ -7,8 +7,10 @@ for file description management tools.
|
|
7
7
|
|
8
8
|
import asyncio
|
9
9
|
import hashlib
|
10
|
+
import html
|
10
11
|
import json
|
11
12
|
import logging
|
13
|
+
import re
|
12
14
|
import uuid
|
13
15
|
from datetime import datetime
|
14
16
|
from pathlib import Path
|
@@ -24,7 +26,8 @@ from mcp_code_indexer.file_scanner import FileScanner
|
|
24
26
|
from mcp_code_indexer.token_counter import TokenCounter
|
25
27
|
from mcp_code_indexer.database.models import (
|
26
28
|
Project, FileDescription, CodebaseOverview, SearchResult,
|
27
|
-
CodebaseSizeInfo, FolderNode, FileNode
|
29
|
+
CodebaseSizeInfo, FolderNode, FileNode, ProjectOverview,
|
30
|
+
WordFrequencyResult
|
28
31
|
)
|
29
32
|
from mcp_code_indexer.error_handler import setup_error_handling, ErrorHandler
|
30
33
|
from mcp_code_indexer.middleware.error_middleware import create_tool_middleware, AsyncTaskManager
|
@@ -85,6 +88,102 @@ class MCPCodeIndexServer:
|
|
85
88
|
extra={"structured_data": {"initialization": {"token_limit": token_limit}}}
|
86
89
|
)
|
87
90
|
|
91
|
+
def _clean_html_entities(self, text: str) -> str:
|
92
|
+
"""
|
93
|
+
Clean HTML entities from text to prevent encoding issues.
|
94
|
+
|
95
|
+
Args:
|
96
|
+
text: Text that may contain HTML entities
|
97
|
+
|
98
|
+
Returns:
|
99
|
+
Text with HTML entities decoded to proper characters
|
100
|
+
"""
|
101
|
+
if not text:
|
102
|
+
return text
|
103
|
+
|
104
|
+
# Decode HTML entities like < > & etc.
|
105
|
+
return html.unescape(text)
|
106
|
+
|
107
|
+
def _clean_arguments(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
|
108
|
+
"""
|
109
|
+
Clean HTML entities from all text arguments.
|
110
|
+
|
111
|
+
Args:
|
112
|
+
arguments: Dictionary of arguments to clean
|
113
|
+
|
114
|
+
Returns:
|
115
|
+
Dictionary with HTML entities decoded in all string values
|
116
|
+
"""
|
117
|
+
cleaned = {}
|
118
|
+
|
119
|
+
for key, value in arguments.items():
|
120
|
+
if isinstance(value, str):
|
121
|
+
cleaned[key] = self._clean_html_entities(value)
|
122
|
+
elif isinstance(value, list):
|
123
|
+
# Clean strings in lists (like conflict resolutions)
|
124
|
+
cleaned[key] = [
|
125
|
+
self._clean_html_entities(item) if isinstance(item, str) else item
|
126
|
+
for item in value
|
127
|
+
]
|
128
|
+
elif isinstance(value, dict):
|
129
|
+
# Recursively clean nested dictionaries
|
130
|
+
cleaned[key] = self._clean_arguments(value)
|
131
|
+
else:
|
132
|
+
# Pass through other types unchanged
|
133
|
+
cleaned[key] = value
|
134
|
+
|
135
|
+
return cleaned
|
136
|
+
|
137
|
+
def _parse_json_robust(self, json_str: str) -> Dict[str, Any]:
|
138
|
+
"""
|
139
|
+
Parse JSON with automatic repair for common issues.
|
140
|
+
|
141
|
+
Args:
|
142
|
+
json_str: JSON string that may have formatting issues
|
143
|
+
|
144
|
+
Returns:
|
145
|
+
Parsed JSON dictionary
|
146
|
+
|
147
|
+
Raises:
|
148
|
+
ValueError: If JSON cannot be parsed even after repair attempts
|
149
|
+
"""
|
150
|
+
# First try normal parsing
|
151
|
+
try:
|
152
|
+
return json.loads(json_str)
|
153
|
+
except json.JSONDecodeError as original_error:
|
154
|
+
logger.warning(f"Initial JSON parse failed: {original_error}")
|
155
|
+
|
156
|
+
# Try to repair common issues
|
157
|
+
repaired = json_str
|
158
|
+
|
159
|
+
# Fix 1: Quote unquoted URLs and paths
|
160
|
+
# Look for patterns like: "key": http://... or "key": /path/...
|
161
|
+
url_pattern = r'("[\w]+"):\s*([a-zA-Z][a-zA-Z0-9+.-]*://[^\s,}]+|/[^\s,}]*)'
|
162
|
+
repaired = re.sub(url_pattern, r'\1: "\2"', repaired)
|
163
|
+
|
164
|
+
# Fix 2: Quote unquoted boolean-like strings
|
165
|
+
# Look for: "key": true-ish-string or "key": false-ish-string
|
166
|
+
bool_pattern = r'("[\w]+"):\s*([a-zA-Z][a-zA-Z0-9_-]*[a-zA-Z0-9])(?=\s*[,}])'
|
167
|
+
repaired = re.sub(bool_pattern, r'\1: "\2"', repaired)
|
168
|
+
|
169
|
+
# Fix 3: Remove trailing commas
|
170
|
+
repaired = re.sub(r',(\s*[}\]])', r'\1', repaired)
|
171
|
+
|
172
|
+
# Fix 4: Ensure proper string quoting for common unquoted values
|
173
|
+
# Handle cases like: "key": value (where value should be "value")
|
174
|
+
unquoted_pattern = r'("[\w]+"):\s*([a-zA-Z0-9_-]+)(?=\s*[,}])'
|
175
|
+
repaired = re.sub(unquoted_pattern, r'\1: "\2"', repaired)
|
176
|
+
|
177
|
+
try:
|
178
|
+
result = json.loads(repaired)
|
179
|
+
logger.info(f"Successfully repaired JSON. Original: {json_str[:100]}...")
|
180
|
+
logger.info(f"Repaired: {repaired[:100]}...")
|
181
|
+
return result
|
182
|
+
except json.JSONDecodeError as repair_error:
|
183
|
+
logger.error(f"JSON repair failed. Original: {json_str}")
|
184
|
+
logger.error(f"Repaired attempt: {repaired}")
|
185
|
+
raise ValueError(f"Could not parse JSON even after repair attempts. Original error: {original_error}, Repair error: {repair_error}")
|
186
|
+
|
88
187
|
async def initialize(self) -> None:
|
89
188
|
"""Initialize database and other resources."""
|
90
189
|
await self.db_manager.initialize()
|
@@ -116,11 +215,11 @@ class MCPCodeIndexServer:
|
|
116
215
|
"description": "Git branch name (e.g., 'main', 'develop')"
|
117
216
|
},
|
118
217
|
"remoteOrigin": {
|
119
|
-
"type":
|
218
|
+
"type": "string",
|
120
219
|
"description": "Git remote origin URL if available"
|
121
220
|
},
|
122
221
|
"upstreamOrigin": {
|
123
|
-
"type":
|
222
|
+
"type": "string",
|
124
223
|
"description": "Upstream repository URL if this is a fork"
|
125
224
|
},
|
126
225
|
"filePath": {
|
@@ -140,11 +239,11 @@ class MCPCodeIndexServer:
|
|
140
239
|
"projectName": {"type": "string", "description": "The name of the project"},
|
141
240
|
"folderPath": {"type": "string", "description": "Absolute path to the project folder on disk"},
|
142
241
|
"branch": {"type": "string", "description": "Git branch name"},
|
143
|
-
"remoteOrigin": {"type":
|
144
|
-
"upstreamOrigin": {"type":
|
242
|
+
"remoteOrigin": {"type": "string", "description": "Git remote origin URL if available"},
|
243
|
+
"upstreamOrigin": {"type": "string", "description": "Upstream repository URL if this is a fork"},
|
145
244
|
"filePath": {"type": "string", "description": "Relative path to the file from project root"},
|
146
245
|
"description": {"type": "string", "description": "Detailed description of the file's contents"},
|
147
|
-
"fileHash": {"type":
|
246
|
+
"fileHash": {"type": "string", "description": "SHA-256 hash of the file contents (optional)"}
|
148
247
|
},
|
149
248
|
"required": ["projectName", "folderPath", "branch", "filePath", "description"]
|
150
249
|
}
|
@@ -158,8 +257,8 @@ class MCPCodeIndexServer:
|
|
158
257
|
"projectName": {"type": "string", "description": "The name of the project"},
|
159
258
|
"folderPath": {"type": "string", "description": "Absolute path to the project folder on disk"},
|
160
259
|
"branch": {"type": "string", "description": "Git branch name"},
|
161
|
-
"remoteOrigin": {"type":
|
162
|
-
"upstreamOrigin": {"type":
|
260
|
+
"remoteOrigin": {"type": "string", "description": "Git remote origin URL if available"},
|
261
|
+
"upstreamOrigin": {"type": "string", "description": "Upstream repository URL if this is a fork"},
|
163
262
|
"tokenLimit": {"type": "integer", "description": "Optional token limit override (defaults to server configuration)"}
|
164
263
|
},
|
165
264
|
"required": ["projectName", "folderPath", "branch"]
|
@@ -174,8 +273,8 @@ class MCPCodeIndexServer:
|
|
174
273
|
"projectName": {"type": "string", "description": "The name of the project"},
|
175
274
|
"folderPath": {"type": "string", "description": "Absolute path to the project folder on disk"},
|
176
275
|
"branch": {"type": "string", "description": "Git branch name"},
|
177
|
-
"remoteOrigin": {"type":
|
178
|
-
"upstreamOrigin": {"type":
|
276
|
+
"remoteOrigin": {"type": "string", "description": "Git remote origin URL if available"},
|
277
|
+
"upstreamOrigin": {"type": "string", "description": "Upstream repository URL if this is a fork"},
|
179
278
|
"limit": {"type": "integer", "description": "Maximum number of missing files to return (optional)"}
|
180
279
|
},
|
181
280
|
"required": ["projectName", "folderPath", "branch"]
|
@@ -190,8 +289,8 @@ class MCPCodeIndexServer:
|
|
190
289
|
"projectName": {"type": "string", "description": "The name of the project"},
|
191
290
|
"folderPath": {"type": "string", "description": "Absolute path to the project folder on disk"},
|
192
291
|
"branch": {"type": "string", "description": "Git branch to search in"},
|
193
|
-
"remoteOrigin": {"type":
|
194
|
-
"upstreamOrigin": {"type":
|
292
|
+
"remoteOrigin": {"type": "string", "description": "Git remote origin URL if available"},
|
293
|
+
"upstreamOrigin": {"type": "string", "description": "Upstream repository URL if this is a fork"},
|
195
294
|
"query": {"type": "string", "description": "Search query (e.g., 'authentication middleware', 'database models')"},
|
196
295
|
"maxResults": {"type": "integer", "default": 20, "description": "Maximum number of results to return"}
|
197
296
|
},
|
@@ -199,16 +298,16 @@ class MCPCodeIndexServer:
|
|
199
298
|
}
|
200
299
|
),
|
201
300
|
types.Tool(
|
202
|
-
name="
|
203
|
-
description="Returns the complete file
|
301
|
+
name="get_all_descriptions",
|
302
|
+
description="Returns the complete file-by-file structure of a codebase with individual descriptions for each file. For large codebases, consider using get_codebase_overview for a condensed summary instead.",
|
204
303
|
inputSchema={
|
205
304
|
"type": "object",
|
206
305
|
"properties": {
|
207
306
|
"projectName": {"type": "string", "description": "The name of the project"},
|
208
307
|
"folderPath": {"type": "string", "description": "Absolute path to the project folder on disk"},
|
209
308
|
"branch": {"type": "string", "description": "Git branch name"},
|
210
|
-
"remoteOrigin": {"type":
|
211
|
-
"upstreamOrigin": {"type":
|
309
|
+
"remoteOrigin": {"type": "string", "description": "Git remote origin URL if available"},
|
310
|
+
"upstreamOrigin": {"type": "string", "description": "Upstream repository URL if this is a fork"}
|
212
311
|
},
|
213
312
|
"required": ["projectName", "folderPath", "branch"]
|
214
313
|
}
|
@@ -221,8 +320,8 @@ class MCPCodeIndexServer:
|
|
221
320
|
"properties": {
|
222
321
|
"projectName": {"type": "string", "description": "The name of the project"},
|
223
322
|
"folderPath": {"type": "string", "description": "Absolute path to the project folder"},
|
224
|
-
"remoteOrigin": {"type":
|
225
|
-
"upstreamOrigin": {"type":
|
323
|
+
"remoteOrigin": {"type": "string", "description": "Git remote origin URL"},
|
324
|
+
"upstreamOrigin": {"type": "string", "description": "Upstream repository URL if this is a fork"},
|
226
325
|
"sourceBranch": {"type": "string", "description": "Branch to merge from (e.g., 'feature/new-ui')"},
|
227
326
|
"targetBranch": {"type": "string", "description": "Branch to merge into (e.g., 'main')"},
|
228
327
|
"conflictResolutions": {
|
@@ -240,6 +339,53 @@ class MCPCodeIndexServer:
|
|
240
339
|
},
|
241
340
|
"required": ["projectName", "folderPath", "sourceBranch", "targetBranch"]
|
242
341
|
}
|
342
|
+
),
|
343
|
+
types.Tool(
|
344
|
+
name="get_codebase_overview",
|
345
|
+
description="Returns a condensed, interpretive overview of the entire codebase. This is a single comprehensive narrative that captures the architecture, key components, relationships, and design patterns. Unlike get_all_descriptions which lists every file, this provides a holistic view suitable for understanding the codebase's structure and purpose. If no overview exists, returns empty string.",
|
346
|
+
inputSchema={
|
347
|
+
"type": "object",
|
348
|
+
"properties": {
|
349
|
+
"projectName": {"type": "string", "description": "The name of the project"},
|
350
|
+
"folderPath": {"type": "string", "description": "Absolute path to the project folder on disk"},
|
351
|
+
"branch": {"type": "string", "description": "Git branch name"},
|
352
|
+
"remoteOrigin": {"type": "string", "description": "Git remote origin URL if available"},
|
353
|
+
"upstreamOrigin": {"type": "string", "description": "Upstream repository URL if this is a fork"}
|
354
|
+
},
|
355
|
+
"required": ["projectName", "folderPath", "branch"]
|
356
|
+
}
|
357
|
+
),
|
358
|
+
types.Tool(
|
359
|
+
name="update_codebase_overview",
|
360
|
+
description="Updates the condensed codebase overview. Create a comprehensive narrative that would help a new developer understand this codebase. Include: (1) A visual directory tree showing the main folders and their purposes, (2) Overall architecture - how components fit together, (3) Core business logic and main workflows, (4) Key technical patterns and conventions used, (5) Important dependencies and integrations, (6) Database schema overview if applicable, (7) API structure if applicable, (8) Testing approach, (9) Build and deployment notes. Write in a clear, structured format with headers and sections. Be thorough but organized - imagine writing a technical onboarding document. The overview should be substantial (think 10-20 pages of text) but well-structured so specific sections can be found easily.",
|
361
|
+
inputSchema={
|
362
|
+
"type": "object",
|
363
|
+
"properties": {
|
364
|
+
"projectName": {"type": "string", "description": "The name of the project"},
|
365
|
+
"folderPath": {"type": "string", "description": "Absolute path to the project folder on disk"},
|
366
|
+
"branch": {"type": "string", "description": "Git branch name"},
|
367
|
+
"remoteOrigin": {"type": "string", "description": "Git remote origin URL if available"},
|
368
|
+
"upstreamOrigin": {"type": "string", "description": "Upstream repository URL if this is a fork"},
|
369
|
+
"overview": {"type": "string", "description": "Comprehensive narrative overview of the codebase (10-30k tokens recommended)"}
|
370
|
+
},
|
371
|
+
"required": ["projectName", "folderPath", "branch", "overview"]
|
372
|
+
}
|
373
|
+
),
|
374
|
+
types.Tool(
|
375
|
+
name="get_word_frequency",
|
376
|
+
description="Analyzes all file descriptions to find the most frequently used technical terms. Filters out common English stop words and symbols, returning the top 200 meaningful terms. Useful for understanding the codebase's domain vocabulary and finding all functions/files related to specific concepts.",
|
377
|
+
inputSchema={
|
378
|
+
"type": "object",
|
379
|
+
"properties": {
|
380
|
+
"projectName": {"type": "string", "description": "The name of the project"},
|
381
|
+
"folderPath": {"type": "string", "description": "Absolute path to the project folder on disk"},
|
382
|
+
"branch": {"type": "string", "description": "Git branch name"},
|
383
|
+
"remoteOrigin": {"type": "string", "description": "Git remote origin URL if available"},
|
384
|
+
"upstreamOrigin": {"type": "string", "description": "Upstream repository URL if this is a fork"},
|
385
|
+
"limit": {"type": "integer", "default": 200, "description": "Number of top terms to return"}
|
386
|
+
},
|
387
|
+
"required": ["projectName", "folderPath", "branch"]
|
388
|
+
}
|
243
389
|
)
|
244
390
|
]
|
245
391
|
|
@@ -253,7 +399,10 @@ class MCPCodeIndexServer:
|
|
253
399
|
"check_codebase_size": self._handle_check_codebase_size,
|
254
400
|
"find_missing_descriptions": self._handle_find_missing_descriptions,
|
255
401
|
"search_descriptions": self._handle_search_descriptions,
|
256
|
-
"
|
402
|
+
"get_all_descriptions": self._handle_get_codebase_overview,
|
403
|
+
"get_codebase_overview": self._handle_get_condensed_overview,
|
404
|
+
"update_codebase_overview": self._handle_update_codebase_overview,
|
405
|
+
"get_word_frequency": self._handle_get_word_frequency,
|
257
406
|
"merge_branch_descriptions": self._handle_merge_branch_descriptions,
|
258
407
|
}
|
259
408
|
|
@@ -270,7 +419,10 @@ class MCPCodeIndexServer:
|
|
270
419
|
|
271
420
|
async def _execute_tool_handler(self, handler, arguments: Dict[str, Any]) -> List[types.TextContent]:
|
272
421
|
"""Execute a tool handler and format the result."""
|
273
|
-
|
422
|
+
# Clean HTML entities from all arguments before processing
|
423
|
+
cleaned_arguments = self._clean_arguments(arguments)
|
424
|
+
|
425
|
+
result = await handler(cleaned_arguments)
|
274
426
|
|
275
427
|
return [types.TextContent(
|
276
428
|
type="text",
|
@@ -410,8 +562,8 @@ class MCPCodeIndexServer:
|
|
410
562
|
if not scanner.is_valid_project_directory():
|
411
563
|
return False
|
412
564
|
|
413
|
-
current_files = scanner.
|
414
|
-
current_basenames = {
|
565
|
+
current_files = scanner.scan_directory()
|
566
|
+
current_basenames = {f.name for f in current_files}
|
415
567
|
|
416
568
|
if not current_basenames:
|
417
569
|
return False
|
@@ -578,8 +730,16 @@ class MCPCodeIndexServer:
|
|
578
730
|
"""Handle check_codebase_size tool calls."""
|
579
731
|
project_id = await self._get_or_create_project_id(arguments)
|
580
732
|
resolved_branch = await self._resolve_branch(project_id, arguments["branch"])
|
733
|
+
folder_path = Path(arguments["folderPath"])
|
581
734
|
|
582
|
-
#
|
735
|
+
# Clean up descriptions for files that no longer exist
|
736
|
+
cleaned_up_files = await self.db_manager.cleanup_missing_files(
|
737
|
+
project_id=project_id,
|
738
|
+
branch=resolved_branch,
|
739
|
+
project_root=folder_path
|
740
|
+
)
|
741
|
+
|
742
|
+
# Get file descriptions for this project/branch (after cleanup)
|
583
743
|
file_descriptions = await self.db_manager.get_all_file_descriptions(
|
584
744
|
project_id=project_id,
|
585
745
|
branch=resolved_branch
|
@@ -598,7 +758,9 @@ class MCPCodeIndexServer:
|
|
598
758
|
"isLarge": is_large,
|
599
759
|
"recommendation": recommendation,
|
600
760
|
"tokenLimit": token_limit,
|
601
|
-
"totalFiles": len(file_descriptions)
|
761
|
+
"totalFiles": len(file_descriptions),
|
762
|
+
"cleanedUpFiles": cleaned_up_files,
|
763
|
+
"cleanedUpCount": len(cleaned_up_files)
|
602
764
|
}
|
603
765
|
|
604
766
|
async def _handle_find_missing_descriptions(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
|
@@ -686,18 +848,7 @@ class MCPCodeIndexServer:
|
|
686
848
|
total_tokens = self.token_counter.calculate_codebase_tokens(file_descriptions)
|
687
849
|
is_large = self.token_counter.is_large_codebase(total_tokens)
|
688
850
|
|
689
|
-
#
|
690
|
-
if is_large:
|
691
|
-
return {
|
692
|
-
"isLarge": True,
|
693
|
-
"totalTokens": total_tokens,
|
694
|
-
"tokenLimit": self.token_counter.token_limit,
|
695
|
-
"totalFiles": len(file_descriptions),
|
696
|
-
"recommendation": "use_search",
|
697
|
-
"message": f"Codebase has {total_tokens} tokens (limit: {self.token_counter.token_limit}). Use search_descriptions instead for better performance."
|
698
|
-
}
|
699
|
-
|
700
|
-
# Build folder structure
|
851
|
+
# Always build and return the folder structure - if the AI called this tool, it wants the overview
|
701
852
|
structure = self._build_folder_structure(file_descriptions)
|
702
853
|
|
703
854
|
return {
|
@@ -814,6 +965,83 @@ class MCPCodeIndexServer:
|
|
814
965
|
**result
|
815
966
|
}
|
816
967
|
|
968
|
+
async def _handle_get_condensed_overview(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
|
969
|
+
"""Handle get_codebase_overview tool calls for condensed overviews."""
|
970
|
+
project_id = await self._get_or_create_project_id(arguments)
|
971
|
+
resolved_branch = await self._resolve_branch(project_id, arguments["branch"])
|
972
|
+
|
973
|
+
# Try to get existing overview
|
974
|
+
overview = await self.db_manager.get_project_overview(project_id, resolved_branch)
|
975
|
+
|
976
|
+
if overview:
|
977
|
+
return {
|
978
|
+
"overview": overview.overview,
|
979
|
+
"lastModified": overview.last_modified.isoformat(),
|
980
|
+
"totalFiles": overview.total_files,
|
981
|
+
"totalTokensInFullDescriptions": overview.total_tokens
|
982
|
+
}
|
983
|
+
else:
|
984
|
+
return {
|
985
|
+
"overview": "",
|
986
|
+
"lastModified": "",
|
987
|
+
"totalFiles": 0,
|
988
|
+
"totalTokensInFullDescriptions": 0
|
989
|
+
}
|
990
|
+
|
991
|
+
async def _handle_update_codebase_overview(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
|
992
|
+
"""Handle update_codebase_overview tool calls."""
|
993
|
+
project_id = await self._get_or_create_project_id(arguments)
|
994
|
+
resolved_branch = await self._resolve_branch(project_id, arguments["branch"])
|
995
|
+
folder_path = Path(arguments["folderPath"])
|
996
|
+
|
997
|
+
# Get current file count and total tokens for context
|
998
|
+
file_descriptions = await self.db_manager.get_all_file_descriptions(
|
999
|
+
project_id=project_id,
|
1000
|
+
branch=resolved_branch
|
1001
|
+
)
|
1002
|
+
|
1003
|
+
total_files = len(file_descriptions)
|
1004
|
+
total_tokens = self.token_counter.calculate_codebase_tokens(file_descriptions)
|
1005
|
+
|
1006
|
+
# Create overview record
|
1007
|
+
overview = ProjectOverview(
|
1008
|
+
project_id=project_id,
|
1009
|
+
branch=resolved_branch,
|
1010
|
+
overview=arguments["overview"],
|
1011
|
+
last_modified=datetime.utcnow(),
|
1012
|
+
total_files=total_files,
|
1013
|
+
total_tokens=total_tokens
|
1014
|
+
)
|
1015
|
+
|
1016
|
+
await self.db_manager.create_project_overview(overview)
|
1017
|
+
|
1018
|
+
return {
|
1019
|
+
"success": True,
|
1020
|
+
"message": f"Overview updated for {total_files} files",
|
1021
|
+
"totalFiles": total_files,
|
1022
|
+
"totalTokens": total_tokens,
|
1023
|
+
"overviewLength": len(arguments["overview"])
|
1024
|
+
}
|
1025
|
+
|
1026
|
+
async def _handle_get_word_frequency(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
|
1027
|
+
"""Handle get_word_frequency tool calls."""
|
1028
|
+
project_id = await self._get_or_create_project_id(arguments)
|
1029
|
+
resolved_branch = await self._resolve_branch(project_id, arguments["branch"])
|
1030
|
+
limit = arguments.get("limit", 200)
|
1031
|
+
|
1032
|
+
# Analyze word frequency
|
1033
|
+
result = await self.db_manager.analyze_word_frequency(
|
1034
|
+
project_id=project_id,
|
1035
|
+
branch=resolved_branch,
|
1036
|
+
limit=limit
|
1037
|
+
)
|
1038
|
+
|
1039
|
+
return {
|
1040
|
+
"topTerms": [{"term": term.term, "frequency": term.frequency} for term in result.top_terms],
|
1041
|
+
"totalTermsAnalyzed": result.total_terms_analyzed,
|
1042
|
+
"totalUniqueTerms": result.total_unique_terms
|
1043
|
+
}
|
1044
|
+
|
817
1045
|
async def _run_session_with_retry(self, read_stream, write_stream, initialization_options) -> None:
|
818
1046
|
"""Run a single MCP session with error handling and retry logic."""
|
819
1047
|
max_retries = 3
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: mcp-code-indexer
|
3
|
-
Version: 1.
|
3
|
+
Version: 1.2.0
|
4
4
|
Summary: MCP server that tracks file descriptions across codebases, enabling AI agents to efficiently navigate and understand code through searchable summaries and token-aware overviews.
|
5
5
|
Author: MCP Code Indexer Contributors
|
6
6
|
Maintainer: MCP Code Indexer Contributors
|
@@ -158,12 +158,12 @@ mypy src/
|
|
158
158
|
|
159
159
|
## 🛠️ MCP Tools Available
|
160
160
|
|
161
|
-
The server provides **
|
161
|
+
The server provides **11 powerful MCP tools** for intelligent codebase management:
|
162
162
|
|
163
163
|
### Core Operations
|
164
164
|
- **`get_file_description`** - Retrieve stored file descriptions instantly
|
165
165
|
- **`update_file_description`** - Store detailed file summaries and metadata
|
166
|
-
- **`check_codebase_size`** - Get token count and size-based recommendations
|
166
|
+
- **`check_codebase_size`** - Get token count and size-based recommendations with automatic file cleanup
|
167
167
|
|
168
168
|
### Batch Operations
|
169
169
|
- **`find_missing_descriptions`** - Scan projects for files without descriptions
|
@@ -171,10 +171,13 @@ The server provides **8 powerful MCP tools** for intelligent codebase management
|
|
171
171
|
|
172
172
|
### Search & Discovery
|
173
173
|
- **`search_descriptions`** - Fast full-text search across all descriptions
|
174
|
-
- **`
|
174
|
+
- **`get_all_descriptions`** - Complete hierarchical project structure
|
175
|
+
- **`get_codebase_overview`** - Condensed narrative overview of entire codebase
|
176
|
+
- **`get_word_frequency`** - Technical vocabulary analysis with stop-word filtering
|
175
177
|
|
176
178
|
### Advanced Features
|
177
179
|
- **`merge_branch_descriptions`** - Two-phase merge with conflict resolution
|
180
|
+
- **`update_codebase_overview`** - Create comprehensive codebase documentation
|
178
181
|
|
179
182
|
## 🏗️ Architecture Highlights
|
180
183
|
|
@@ -1,22 +1,24 @@
|
|
1
1
|
mcp_code_indexer/__init__.py,sha256=PUkiM7VGRk7n2B_Ma0fzZWC0wmHCjyE15wxsvU9I54E,473
|
2
|
+
mcp_code_indexer/__main__.py,sha256=4Edinoe0ug43hobuLYcjTmGp2YJnlFYN4_8iKvUBJ0Q,213
|
2
3
|
mcp_code_indexer/error_handler.py,sha256=cNSUFFrGBMLDv4qa78c7495L1wSl_dXCRbzCJOidx-Q,11590
|
3
4
|
mcp_code_indexer/file_scanner.py,sha256=ctXeZMROgDThEtjzsANTK9TbK-fhTScMBd4iyuleBT4,11734
|
4
5
|
mcp_code_indexer/logging_config.py,sha256=5L1cYIG8IAX91yCjc5pzkbO_KPt0bvm_ABHB53LBZjI,5184
|
5
|
-
mcp_code_indexer/main.py,sha256=
|
6
|
+
mcp_code_indexer/main.py,sha256=eRc0Vl3DVDGS5XtuPCDBArgmqcBIi92O97LbE8HYGGA,13601
|
6
7
|
mcp_code_indexer/merge_handler.py,sha256=lJR8eVq2qSrF6MW9mR3Fy8UzrNAaQ7RsI2FMNXne3vQ,14692
|
7
8
|
mcp_code_indexer/token_counter.py,sha256=WrifOkbF99nWWHlRlhCHAB2KN7qr83GOHl7apE-hJcE,8460
|
9
|
+
mcp_code_indexer/data/stop_words_english.txt,sha256=7Zdd9ameVgA6tN_zuXROvHXD4hkWeELVywPhb7FJEkw,6343
|
8
10
|
mcp_code_indexer/database/__init__.py,sha256=aPq_aaRp0aSwOBIq9GkuMNjmLxA411zg2vhdrAuHm-w,38
|
9
|
-
mcp_code_indexer/database/database.py,sha256=
|
10
|
-
mcp_code_indexer/database/models.py,sha256=
|
11
|
+
mcp_code_indexer/database/database.py,sha256=ziePr0QHkPwv-plLRdySB8ei8fcXc3SOIgC0uRi47KI,26600
|
12
|
+
mcp_code_indexer/database/models.py,sha256=_vCmJnPXZSiInRzyvs4c7QUWuNNW8qsOoDlGX8J-Gnk,7124
|
11
13
|
mcp_code_indexer/middleware/__init__.py,sha256=p-mP0pMsfiU2yajCPvokCUxUEkh_lu4XJP1LyyMW2ug,220
|
12
14
|
mcp_code_indexer/middleware/error_middleware.py,sha256=v6jaHmPxf3qerYdb85X1tHIXLxgcbybpitKVakFLQTA,10109
|
13
15
|
mcp_code_indexer/server/__init__.py,sha256=16xMcuriUOBlawRqWNBk6niwrvtv_JD5xvI36X1Vsmk,41
|
14
|
-
mcp_code_indexer/server/mcp_server.py,sha256=
|
16
|
+
mcp_code_indexer/server/mcp_server.py,sha256=leUVWUJqbQomu7VO7uH80FHCx40EC7ba01pbx79aZTc,58664
|
15
17
|
mcp_code_indexer/tiktoken_cache/9b5ad71b2ce5302211f9c61530b329a4922fc6a4,sha256=Ijkht27pm96ZW3_3OFE-7xAPtR0YyTWXoRO8_-hlsqc,1681126
|
16
18
|
mcp_code_indexer/tools/__init__.py,sha256=m01mxML2UdD7y5rih_XNhNSCMzQTz7WQ_T1TeOcYlnE,49
|
17
|
-
mcp_code_indexer-1.
|
18
|
-
mcp_code_indexer-1.
|
19
|
-
mcp_code_indexer-1.
|
20
|
-
mcp_code_indexer-1.
|
21
|
-
mcp_code_indexer-1.
|
22
|
-
mcp_code_indexer-1.
|
19
|
+
mcp_code_indexer-1.2.0.dist-info/licenses/LICENSE,sha256=JN9dyPPgYwH9C-UjYM7FLNZjQ6BF7kAzpF3_4PwY4rY,1086
|
20
|
+
mcp_code_indexer-1.2.0.dist-info/METADATA,sha256=4P7egl42wv-bo6TGnnv-3RoPeg6SDZjGZYfBa6r2k_g,12201
|
21
|
+
mcp_code_indexer-1.2.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
22
|
+
mcp_code_indexer-1.2.0.dist-info/entry_points.txt,sha256=8HqWOw1Is7jOP1bvIgaSwouvT9z_Boe-9hd4NzyJOhY,68
|
23
|
+
mcp_code_indexer-1.2.0.dist-info/top_level.txt,sha256=yKYCM-gMGt-cnupGfAhnZaoEsROLB6DQ1KFUuyKx4rw,17
|
24
|
+
mcp_code_indexer-1.2.0.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|