mcp-code-indexer 1.1.2__tar.gz → 1.1.5__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. {mcp_code_indexer-1.1.2/src/mcp_code_indexer.egg-info → mcp_code_indexer-1.1.5}/PKG-INFO +1 -1
  2. {mcp_code_indexer-1.1.2 → mcp_code_indexer-1.1.5}/pyproject.toml +1 -1
  3. mcp_code_indexer-1.1.5/src/mcp_code_indexer/__main__.py +11 -0
  4. mcp_code_indexer-1.1.5/src/mcp_code_indexer/main.py +399 -0
  5. {mcp_code_indexer-1.1.2 → mcp_code_indexer-1.1.5}/src/mcp_code_indexer/server/mcp_server.py +126 -21
  6. {mcp_code_indexer-1.1.2 → mcp_code_indexer-1.1.5/src/mcp_code_indexer.egg-info}/PKG-INFO +1 -1
  7. {mcp_code_indexer-1.1.2 → mcp_code_indexer-1.1.5}/src/mcp_code_indexer.egg-info/SOURCES.txt +1 -0
  8. mcp_code_indexer-1.1.2/src/mcp_code_indexer/main.py +0 -134
  9. {mcp_code_indexer-1.1.2 → mcp_code_indexer-1.1.5}/LICENSE +0 -0
  10. {mcp_code_indexer-1.1.2 → mcp_code_indexer-1.1.5}/MANIFEST.in +0 -0
  11. {mcp_code_indexer-1.1.2 → mcp_code_indexer-1.1.5}/README.md +0 -0
  12. {mcp_code_indexer-1.1.2 → mcp_code_indexer-1.1.5}/docs/api-reference.md +0 -0
  13. {mcp_code_indexer-1.1.2 → mcp_code_indexer-1.1.5}/docs/architecture.md +0 -0
  14. {mcp_code_indexer-1.1.2 → mcp_code_indexer-1.1.5}/docs/configuration.md +0 -0
  15. {mcp_code_indexer-1.1.2 → mcp_code_indexer-1.1.5}/docs/contributing.md +0 -0
  16. {mcp_code_indexer-1.1.2 → mcp_code_indexer-1.1.5}/migrations/001_initial.sql +0 -0
  17. {mcp_code_indexer-1.1.2 → mcp_code_indexer-1.1.5}/migrations/002_performance_indexes.sql +0 -0
  18. {mcp_code_indexer-1.1.2 → mcp_code_indexer-1.1.5}/requirements.txt +0 -0
  19. {mcp_code_indexer-1.1.2 → mcp_code_indexer-1.1.5}/setup.cfg +0 -0
  20. {mcp_code_indexer-1.1.2 → mcp_code_indexer-1.1.5}/setup.py +0 -0
  21. {mcp_code_indexer-1.1.2 → mcp_code_indexer-1.1.5}/src/mcp_code_indexer/__init__.py +0 -0
  22. {mcp_code_indexer-1.1.2 → mcp_code_indexer-1.1.5}/src/mcp_code_indexer/database/__init__.py +0 -0
  23. {mcp_code_indexer-1.1.2 → mcp_code_indexer-1.1.5}/src/mcp_code_indexer/database/database.py +0 -0
  24. {mcp_code_indexer-1.1.2 → mcp_code_indexer-1.1.5}/src/mcp_code_indexer/database/models.py +0 -0
  25. {mcp_code_indexer-1.1.2 → mcp_code_indexer-1.1.5}/src/mcp_code_indexer/error_handler.py +0 -0
  26. {mcp_code_indexer-1.1.2 → mcp_code_indexer-1.1.5}/src/mcp_code_indexer/file_scanner.py +0 -0
  27. {mcp_code_indexer-1.1.2 → mcp_code_indexer-1.1.5}/src/mcp_code_indexer/logging_config.py +0 -0
  28. {mcp_code_indexer-1.1.2 → mcp_code_indexer-1.1.5}/src/mcp_code_indexer/merge_handler.py +0 -0
  29. {mcp_code_indexer-1.1.2 → mcp_code_indexer-1.1.5}/src/mcp_code_indexer/middleware/__init__.py +0 -0
  30. {mcp_code_indexer-1.1.2 → mcp_code_indexer-1.1.5}/src/mcp_code_indexer/middleware/error_middleware.py +0 -0
  31. {mcp_code_indexer-1.1.2 → mcp_code_indexer-1.1.5}/src/mcp_code_indexer/server/__init__.py +0 -0
  32. {mcp_code_indexer-1.1.2 → mcp_code_indexer-1.1.5}/src/mcp_code_indexer/tiktoken_cache/9b5ad71b2ce5302211f9c61530b329a4922fc6a4 +0 -0
  33. {mcp_code_indexer-1.1.2 → mcp_code_indexer-1.1.5}/src/mcp_code_indexer/token_counter.py +0 -0
  34. {mcp_code_indexer-1.1.2 → mcp_code_indexer-1.1.5}/src/mcp_code_indexer/tools/__init__.py +0 -0
  35. {mcp_code_indexer-1.1.2 → mcp_code_indexer-1.1.5}/src/mcp_code_indexer.egg-info/dependency_links.txt +0 -0
  36. {mcp_code_indexer-1.1.2 → mcp_code_indexer-1.1.5}/src/mcp_code_indexer.egg-info/entry_points.txt +0 -0
  37. {mcp_code_indexer-1.1.2 → mcp_code_indexer-1.1.5}/src/mcp_code_indexer.egg-info/requires.txt +0 -0
  38. {mcp_code_indexer-1.1.2 → mcp_code_indexer-1.1.5}/src/mcp_code_indexer.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: mcp-code-indexer
3
- Version: 1.1.2
3
+ Version: 1.1.5
4
4
  Summary: MCP server that tracks file descriptions across codebases, enabling AI agents to efficiently navigate and understand code through searchable summaries and token-aware overviews.
5
5
  Author: MCP Code Indexer Contributors
6
6
  Maintainer: MCP Code Indexer Contributors
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "mcp-code-indexer"
7
- version = "1.1.2"
7
+ version = "1.1.5"
8
8
  description = "MCP server that tracks file descriptions across codebases, enabling AI agents to efficiently navigate and understand code through searchable summaries and token-aware overviews."
9
9
  readme = "README.md"
10
10
  license = {text = "MIT"}
@@ -0,0 +1,11 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ MCP Code Indexer Package Main Module
4
+
5
+ Entry point for the mcp-code-indexer package when called with python -m.
6
+ """
7
+
8
+ from .main import cli_main
9
+
10
+ if __name__ == "__main__":
11
+ cli_main()
@@ -0,0 +1,399 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ MCP Code Indexer Package Main Module
4
+
5
+ Entry point for the mcp-code-indexer package when installed via pip.
6
+ """
7
+
8
+ import argparse
9
+ import asyncio
10
+ import json
11
+ import logging
12
+ import sys
13
+ from pathlib import Path
14
+ from typing import Any, Dict
15
+
16
+ from . import __version__
17
+ from .logging_config import setup_logging
18
+ from .error_handler import setup_error_handling
19
+
20
+
21
+ def parse_arguments() -> argparse.Namespace:
22
+ """Parse command line arguments."""
23
+ parser = argparse.ArgumentParser(
24
+ description="MCP Code Index Server - Track file descriptions across codebases",
25
+ prog="mcp-code-indexer"
26
+ )
27
+
28
+ parser.add_argument(
29
+ "--version",
30
+ action="version",
31
+ version=f"mcp-code-indexer {__version__}"
32
+ )
33
+
34
+ parser.add_argument(
35
+ "--token-limit",
36
+ type=int,
37
+ default=32000,
38
+ help="Maximum tokens before recommending search instead of full overview (default: 32000)"
39
+ )
40
+
41
+ parser.add_argument(
42
+ "--db-path",
43
+ type=str,
44
+ default="~/.mcp-code-index/tracker.db",
45
+ help="Path to SQLite database (default: ~/.mcp-code-index/tracker.db)"
46
+ )
47
+
48
+ parser.add_argument(
49
+ "--cache-dir",
50
+ type=str,
51
+ default="~/.mcp-code-index/cache",
52
+ help="Directory for caching token counts (default: ~/.mcp-code-index/cache)"
53
+ )
54
+
55
+ parser.add_argument(
56
+ "--log-level",
57
+ type=str,
58
+ choices=["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"],
59
+ default="INFO",
60
+ help="Logging level (default: INFO)"
61
+ )
62
+
63
+ # Utility commands
64
+ parser.add_argument(
65
+ "--getprojects",
66
+ action="store_true",
67
+ help="List all projects with IDs, branches, and description counts"
68
+ )
69
+
70
+ parser.add_argument(
71
+ "--runcommand",
72
+ type=str,
73
+ help="Execute a command using JSON in MCP format (single or multi-line)"
74
+ )
75
+
76
+ parser.add_argument(
77
+ "--dumpdescriptions",
78
+ nargs="+",
79
+ metavar=("PROJECT_ID", "BRANCH"),
80
+ help="Export descriptions for a project. Usage: --dumpdescriptions PROJECT_ID [BRANCH]"
81
+ )
82
+
83
+ return parser.parse_args()
84
+
85
+
86
+ async def handle_getprojects(args: argparse.Namespace) -> None:
87
+ """Handle --getprojects command."""
88
+ try:
89
+ from .database.database import DatabaseManager
90
+
91
+ # Initialize database
92
+ db_path = Path(args.db_path).expanduser()
93
+ db_manager = DatabaseManager(db_path)
94
+ await db_manager.initialize()
95
+
96
+ # Get all projects
97
+ projects = await db_manager.get_all_projects()
98
+
99
+ if not projects:
100
+ print("No projects found.")
101
+ return
102
+
103
+ print("Projects:")
104
+ print("-" * 80)
105
+
106
+ for project in projects:
107
+ print(f"ID: {project.id}")
108
+ print(f"Name: {project.name}")
109
+ print(f"Remote Origin: {project.remote_origin or 'N/A'}")
110
+ print(f"Upstream Origin: {project.upstream_origin or 'N/A'}")
111
+
112
+ # Get branch information
113
+ try:
114
+ branch_counts = await db_manager.get_branch_file_counts(project.id)
115
+ if branch_counts:
116
+ print("Branches:")
117
+ for branch, count in branch_counts.items():
118
+ print(f" - {branch}: {count} descriptions")
119
+ else:
120
+ print("Branches: No descriptions found")
121
+ except Exception as e:
122
+ print(f"Branches: Error loading branch info - {e}")
123
+
124
+ print("-" * 80)
125
+
126
+ except Exception as e:
127
+ print(f"Error: {e}", file=sys.stderr)
128
+ sys.exit(1)
129
+
130
+
131
+ async def handle_runcommand(args: argparse.Namespace) -> None:
132
+ """Handle --runcommand command."""
133
+ from .server.mcp_server import MCPCodeIndexServer
134
+
135
+ try:
136
+ # Parse JSON (handle both single-line and multi-line)
137
+ json_data = json.loads(args.runcommand)
138
+ except json.JSONDecodeError as e:
139
+ print(f"Initial JSON parse failed: {e}", file=sys.stderr)
140
+
141
+ # Try to repair the JSON
142
+ try:
143
+ import re
144
+ repaired = args.runcommand
145
+
146
+ # Fix common issues
147
+ # Quote unquoted URLs and paths
148
+ url_pattern = r'("[\w]+"):\s*([a-zA-Z][a-zA-Z0-9+.-]*://[^\s,}]+|/[^\s,}]*)'
149
+ repaired = re.sub(url_pattern, r'\1: "\2"', repaired)
150
+
151
+ # Quote unquoted values
152
+ unquoted_pattern = r'("[\w]+"):\s*([a-zA-Z0-9_-]+)(?=\s*[,}])'
153
+ repaired = re.sub(unquoted_pattern, r'\1: "\2"', repaired)
154
+
155
+ # Remove trailing commas
156
+ repaired = re.sub(r',(\s*[}\]])', r'\1', repaired)
157
+
158
+ json_data = json.loads(repaired)
159
+ print(f"JSON repaired successfully", file=sys.stderr)
160
+ print(f"Original: {args.runcommand}", file=sys.stderr)
161
+ print(f"Repaired: {repaired}", file=sys.stderr)
162
+ except json.JSONDecodeError as repair_error:
163
+ print(f"JSON repair also failed: {repair_error}", file=sys.stderr)
164
+ print(f"Original JSON: {args.runcommand}", file=sys.stderr)
165
+ sys.exit(1)
166
+
167
+ # Initialize server
168
+ db_path = Path(args.db_path).expanduser()
169
+ cache_dir = Path(args.cache_dir).expanduser()
170
+
171
+ server = MCPCodeIndexServer(
172
+ token_limit=args.token_limit,
173
+ db_path=db_path,
174
+ cache_dir=cache_dir
175
+ )
176
+ await server.initialize()
177
+
178
+ # Extract the tool call information from the JSON
179
+ if "method" in json_data and json_data["method"] == "tools/call":
180
+ tool_name = json_data["params"]["name"]
181
+ tool_arguments = json_data["params"]["arguments"]
182
+ elif "projectName" in json_data and "folderPath" in json_data:
183
+ # Auto-detect: user provided just arguments, try to infer the tool
184
+ if "filePath" in json_data and "description" in json_data:
185
+ tool_name = "update_file_description"
186
+ tool_arguments = json_data
187
+ print("Auto-detected tool: update_file_description", file=sys.stderr)
188
+ elif "branch" in json_data:
189
+ tool_name = "check_codebase_size"
190
+ tool_arguments = json_data
191
+ print("Auto-detected tool: check_codebase_size", file=sys.stderr)
192
+ else:
193
+ print("Error: Could not auto-detect tool from arguments. Please use full MCP format:", file=sys.stderr)
194
+ print('{"method": "tools/call", "params": {"name": "TOOL_NAME", "arguments": {...}}}', file=sys.stderr)
195
+ sys.exit(1)
196
+
197
+ try:
198
+ # Map tool names to handler methods
199
+ tool_handlers = {
200
+ "get_file_description": server._handle_get_file_description,
201
+ "update_file_description": server._handle_update_file_description,
202
+ "check_codebase_size": server._handle_check_codebase_size,
203
+ "find_missing_descriptions": server._handle_find_missing_descriptions,
204
+ "search_descriptions": server._handle_search_descriptions,
205
+ "get_codebase_overview": server._handle_get_codebase_overview,
206
+ "merge_branch_descriptions": server._handle_merge_branch_descriptions,
207
+ }
208
+
209
+ if tool_name not in tool_handlers:
210
+ error_result = {
211
+ "error": {
212
+ "code": -32601,
213
+ "message": f"Unknown tool: {tool_name}"
214
+ }
215
+ }
216
+ print(json.dumps(error_result, indent=2))
217
+ return
218
+
219
+ # Clean HTML entities from arguments before execution
220
+ def clean_html_entities(text: str) -> str:
221
+ if not text:
222
+ return text
223
+ import html
224
+ return html.unescape(text)
225
+
226
+ def clean_arguments(arguments: dict) -> dict:
227
+ cleaned = {}
228
+ for key, value in arguments.items():
229
+ if isinstance(value, str):
230
+ cleaned[key] = clean_html_entities(value)
231
+ elif isinstance(value, list):
232
+ cleaned[key] = [
233
+ clean_html_entities(item) if isinstance(item, str) else item
234
+ for item in value
235
+ ]
236
+ elif isinstance(value, dict):
237
+ cleaned[key] = clean_arguments(value)
238
+ else:
239
+ cleaned[key] = value
240
+ return cleaned
241
+
242
+ cleaned_tool_arguments = clean_arguments(tool_arguments)
243
+
244
+ # Execute the tool handler directly
245
+ result = await tool_handlers[tool_name](cleaned_tool_arguments)
246
+ print(json.dumps(result, indent=2, default=str))
247
+ except Exception as e:
248
+ error_result = {
249
+ "error": {
250
+ "code": -32603,
251
+ "message": str(e)
252
+ }
253
+ }
254
+ print(json.dumps(error_result, indent=2))
255
+ else:
256
+ print("Error: JSON must contain a valid MCP tool call", file=sys.stderr)
257
+ sys.exit(1)
258
+
259
+
260
+ async def handle_dumpdescriptions(args: argparse.Namespace) -> None:
261
+ """Handle --dumpdescriptions command."""
262
+ from .database.database import DatabaseManager
263
+ from .token_counter import TokenCounter
264
+
265
+ if len(args.dumpdescriptions) < 1:
266
+ print("Error: Project ID is required", file=sys.stderr)
267
+ sys.exit(1)
268
+
269
+ project_id = args.dumpdescriptions[0]
270
+ branch = args.dumpdescriptions[1] if len(args.dumpdescriptions) > 1 else None
271
+
272
+ # Initialize database and token counter
273
+ db_path = Path(args.db_path).expanduser()
274
+ db_manager = DatabaseManager(db_path)
275
+ await db_manager.initialize()
276
+
277
+ token_counter = TokenCounter(args.token_limit)
278
+
279
+ # Get file descriptions
280
+ if branch:
281
+ file_descriptions = await db_manager.get_all_file_descriptions(
282
+ project_id=project_id,
283
+ branch=branch
284
+ )
285
+ print(f"File descriptions for project {project_id}, branch {branch}:")
286
+ else:
287
+ file_descriptions = await db_manager.get_all_file_descriptions(
288
+ project_id=project_id
289
+ )
290
+ print(f"File descriptions for project {project_id} (all branches):")
291
+
292
+ print("=" * 80)
293
+
294
+ if not file_descriptions:
295
+ print("No descriptions found.")
296
+ total_tokens = 0
297
+ else:
298
+ total_tokens = 0
299
+ for desc in file_descriptions:
300
+ print(f"File: {desc.file_path}")
301
+ if branch is None:
302
+ print(f"Branch: {desc.branch}")
303
+ print(f"Description: {desc.description}")
304
+ print("-" * 40)
305
+
306
+ # Count tokens for this description
307
+ desc_tokens = token_counter.count_file_description_tokens(desc)
308
+ total_tokens += desc_tokens
309
+
310
+ print("=" * 80)
311
+ print(f"Total descriptions: {len(file_descriptions)}")
312
+ print(f"Total tokens: {total_tokens}")
313
+
314
+
315
+
316
+ async def main() -> None:
317
+ """Main entry point for the MCP server."""
318
+ args = parse_arguments()
319
+
320
+ # Handle utility commands
321
+ if args.getprojects:
322
+ await handle_getprojects(args)
323
+ return
324
+
325
+ if args.runcommand:
326
+ await handle_runcommand(args)
327
+ return
328
+
329
+ if args.dumpdescriptions:
330
+ await handle_dumpdescriptions(args)
331
+ return
332
+
333
+ # Setup structured logging
334
+ log_file = Path(args.cache_dir).expanduser() / "server.log" if args.cache_dir else None
335
+ logger = setup_logging(
336
+ log_level=args.log_level,
337
+ log_file=log_file,
338
+ enable_file_logging=True
339
+ )
340
+
341
+ # Setup error handling
342
+ error_handler = setup_error_handling(logger)
343
+
344
+ # Expand user paths
345
+ db_path = Path(args.db_path).expanduser()
346
+ cache_dir = Path(args.cache_dir).expanduser()
347
+
348
+ # Create directories if they don't exist
349
+ db_path.parent.mkdir(parents=True, exist_ok=True)
350
+ cache_dir.mkdir(parents=True, exist_ok=True)
351
+
352
+ # Log startup information to stderr (stdout reserved for MCP JSON-RPC)
353
+ logger.info("Starting MCP Code Index Server", extra={
354
+ "structured_data": {
355
+ "startup": {
356
+ "version": __version__,
357
+ "token_limit": args.token_limit,
358
+ "db_path": str(db_path),
359
+ "cache_dir": str(cache_dir),
360
+ "log_level": args.log_level
361
+ }
362
+ }
363
+ })
364
+
365
+ try:
366
+ # Import and run the MCP server
367
+ from .server.mcp_server import MCPCodeIndexServer
368
+
369
+ server = MCPCodeIndexServer(
370
+ token_limit=args.token_limit,
371
+ db_path=db_path,
372
+ cache_dir=cache_dir
373
+ )
374
+
375
+ await server.run()
376
+
377
+ except Exception as e:
378
+ error_handler.log_error(e, context={"phase": "startup"})
379
+ raise
380
+
381
+
382
+ def cli_main():
383
+ """Console script entry point."""
384
+ try:
385
+ asyncio.run(main())
386
+ except KeyboardInterrupt:
387
+ # For MCP servers, we should avoid stdout completely
388
+ # The server will log shutdown through stderr
389
+ pass
390
+ except Exception as e:
391
+ # Log critical errors to stderr, not stdout
392
+ import traceback
393
+ print(f"Server failed to start: {e}", file=sys.stderr)
394
+ print(f"Traceback: {traceback.format_exc()}", file=sys.stderr)
395
+ sys.exit(1)
396
+
397
+
398
+ if __name__ == "__main__":
399
+ cli_main()
@@ -7,8 +7,10 @@ for file description management tools.
7
7
 
8
8
  import asyncio
9
9
  import hashlib
10
+ import html
10
11
  import json
11
12
  import logging
13
+ import re
12
14
  import uuid
13
15
  from datetime import datetime
14
16
  from pathlib import Path
@@ -85,6 +87,102 @@ class MCPCodeIndexServer:
85
87
  extra={"structured_data": {"initialization": {"token_limit": token_limit}}}
86
88
  )
87
89
 
90
+ def _clean_html_entities(self, text: str) -> str:
91
+ """
92
+ Clean HTML entities from text to prevent encoding issues.
93
+
94
+ Args:
95
+ text: Text that may contain HTML entities
96
+
97
+ Returns:
98
+ Text with HTML entities decoded to proper characters
99
+ """
100
+ if not text:
101
+ return text
102
+
103
+ # Decode HTML entities like &lt; &gt; &amp; etc.
104
+ return html.unescape(text)
105
+
106
+ def _clean_arguments(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
107
+ """
108
+ Clean HTML entities from all text arguments.
109
+
110
+ Args:
111
+ arguments: Dictionary of arguments to clean
112
+
113
+ Returns:
114
+ Dictionary with HTML entities decoded in all string values
115
+ """
116
+ cleaned = {}
117
+
118
+ for key, value in arguments.items():
119
+ if isinstance(value, str):
120
+ cleaned[key] = self._clean_html_entities(value)
121
+ elif isinstance(value, list):
122
+ # Clean strings in lists (like conflict resolutions)
123
+ cleaned[key] = [
124
+ self._clean_html_entities(item) if isinstance(item, str) else item
125
+ for item in value
126
+ ]
127
+ elif isinstance(value, dict):
128
+ # Recursively clean nested dictionaries
129
+ cleaned[key] = self._clean_arguments(value)
130
+ else:
131
+ # Pass through other types unchanged
132
+ cleaned[key] = value
133
+
134
+ return cleaned
135
+
136
+ def _parse_json_robust(self, json_str: str) -> Dict[str, Any]:
137
+ """
138
+ Parse JSON with automatic repair for common issues.
139
+
140
+ Args:
141
+ json_str: JSON string that may have formatting issues
142
+
143
+ Returns:
144
+ Parsed JSON dictionary
145
+
146
+ Raises:
147
+ ValueError: If JSON cannot be parsed even after repair attempts
148
+ """
149
+ # First try normal parsing
150
+ try:
151
+ return json.loads(json_str)
152
+ except json.JSONDecodeError as original_error:
153
+ logger.warning(f"Initial JSON parse failed: {original_error}")
154
+
155
+ # Try to repair common issues
156
+ repaired = json_str
157
+
158
+ # Fix 1: Quote unquoted URLs and paths
159
+ # Look for patterns like: "key": http://... or "key": /path/...
160
+ url_pattern = r'("[\w]+"):\s*([a-zA-Z][a-zA-Z0-9+.-]*://[^\s,}]+|/[^\s,}]*)'
161
+ repaired = re.sub(url_pattern, r'\1: "\2"', repaired)
162
+
163
+ # Fix 2: Quote unquoted boolean-like strings
164
+ # Look for: "key": true-ish-string or "key": false-ish-string
165
+ bool_pattern = r'("[\w]+"):\s*([a-zA-Z][a-zA-Z0-9_-]*[a-zA-Z0-9])(?=\s*[,}])'
166
+ repaired = re.sub(bool_pattern, r'\1: "\2"', repaired)
167
+
168
+ # Fix 3: Remove trailing commas
169
+ repaired = re.sub(r',(\s*[}\]])', r'\1', repaired)
170
+
171
+ # Fix 4: Ensure proper string quoting for common unquoted values
172
+ # Handle cases like: "key": value (where value should be "value")
173
+ unquoted_pattern = r'("[\w]+"):\s*([a-zA-Z0-9_-]+)(?=\s*[,}])'
174
+ repaired = re.sub(unquoted_pattern, r'\1: "\2"', repaired)
175
+
176
+ try:
177
+ result = json.loads(repaired)
178
+ logger.info(f"Successfully repaired JSON. Original: {json_str[:100]}...")
179
+ logger.info(f"Repaired: {repaired[:100]}...")
180
+ return result
181
+ except json.JSONDecodeError as repair_error:
182
+ logger.error(f"JSON repair failed. Original: {json_str}")
183
+ logger.error(f"Repaired attempt: {repaired}")
184
+ raise ValueError(f"Could not parse JSON even after repair attempts. Original error: {original_error}, Repair error: {repair_error}")
185
+
88
186
  async def initialize(self) -> None:
89
187
  """Initialize database and other resources."""
90
188
  await self.db_manager.initialize()
@@ -116,11 +214,11 @@ class MCPCodeIndexServer:
116
214
  "description": "Git branch name (e.g., 'main', 'develop')"
117
215
  },
118
216
  "remoteOrigin": {
119
- "type": ["string", "null"],
217
+ "type": "string",
120
218
  "description": "Git remote origin URL if available"
121
219
  },
122
220
  "upstreamOrigin": {
123
- "type": ["string", "null"],
221
+ "type": "string",
124
222
  "description": "Upstream repository URL if this is a fork"
125
223
  },
126
224
  "filePath": {
@@ -140,11 +238,11 @@ class MCPCodeIndexServer:
140
238
  "projectName": {"type": "string", "description": "The name of the project"},
141
239
  "folderPath": {"type": "string", "description": "Absolute path to the project folder on disk"},
142
240
  "branch": {"type": "string", "description": "Git branch name"},
143
- "remoteOrigin": {"type": ["string", "null"], "description": "Git remote origin URL if available"},
144
- "upstreamOrigin": {"type": ["string", "null"], "description": "Upstream repository URL if this is a fork"},
241
+ "remoteOrigin": {"type": "string", "description": "Git remote origin URL if available"},
242
+ "upstreamOrigin": {"type": "string", "description": "Upstream repository URL if this is a fork"},
145
243
  "filePath": {"type": "string", "description": "Relative path to the file from project root"},
146
244
  "description": {"type": "string", "description": "Detailed description of the file's contents"},
147
- "fileHash": {"type": ["string", "null"], "description": "SHA-256 hash of the file contents (optional)"}
245
+ "fileHash": {"type": "string", "description": "SHA-256 hash of the file contents (optional)"}
148
246
  },
149
247
  "required": ["projectName", "folderPath", "branch", "filePath", "description"]
150
248
  }
@@ -158,8 +256,9 @@ class MCPCodeIndexServer:
158
256
  "projectName": {"type": "string", "description": "The name of the project"},
159
257
  "folderPath": {"type": "string", "description": "Absolute path to the project folder on disk"},
160
258
  "branch": {"type": "string", "description": "Git branch name"},
161
- "remoteOrigin": {"type": ["string", "null"], "description": "Git remote origin URL if available"},
162
- "upstreamOrigin": {"type": ["string", "null"], "description": "Upstream repository URL if this is a fork"}
259
+ "remoteOrigin": {"type": "string", "description": "Git remote origin URL if available"},
260
+ "upstreamOrigin": {"type": "string", "description": "Upstream repository URL if this is a fork"},
261
+ "tokenLimit": {"type": "integer", "description": "Optional token limit override (defaults to server configuration)"}
163
262
  },
164
263
  "required": ["projectName", "folderPath", "branch"]
165
264
  }
@@ -173,8 +272,8 @@ class MCPCodeIndexServer:
173
272
  "projectName": {"type": "string", "description": "The name of the project"},
174
273
  "folderPath": {"type": "string", "description": "Absolute path to the project folder on disk"},
175
274
  "branch": {"type": "string", "description": "Git branch name"},
176
- "remoteOrigin": {"type": ["string", "null"], "description": "Git remote origin URL if available"},
177
- "upstreamOrigin": {"type": ["string", "null"], "description": "Upstream repository URL if this is a fork"},
275
+ "remoteOrigin": {"type": "string", "description": "Git remote origin URL if available"},
276
+ "upstreamOrigin": {"type": "string", "description": "Upstream repository URL if this is a fork"},
178
277
  "limit": {"type": "integer", "description": "Maximum number of missing files to return (optional)"}
179
278
  },
180
279
  "required": ["projectName", "folderPath", "branch"]
@@ -189,8 +288,8 @@ class MCPCodeIndexServer:
189
288
  "projectName": {"type": "string", "description": "The name of the project"},
190
289
  "folderPath": {"type": "string", "description": "Absolute path to the project folder on disk"},
191
290
  "branch": {"type": "string", "description": "Git branch to search in"},
192
- "remoteOrigin": {"type": ["string", "null"], "description": "Git remote origin URL if available"},
193
- "upstreamOrigin": {"type": ["string", "null"], "description": "Upstream repository URL if this is a fork"},
291
+ "remoteOrigin": {"type": "string", "description": "Git remote origin URL if available"},
292
+ "upstreamOrigin": {"type": "string", "description": "Upstream repository URL if this is a fork"},
194
293
  "query": {"type": "string", "description": "Search query (e.g., 'authentication middleware', 'database models')"},
195
294
  "maxResults": {"type": "integer", "default": 20, "description": "Maximum number of results to return"}
196
295
  },
@@ -206,8 +305,8 @@ class MCPCodeIndexServer:
206
305
  "projectName": {"type": "string", "description": "The name of the project"},
207
306
  "folderPath": {"type": "string", "description": "Absolute path to the project folder on disk"},
208
307
  "branch": {"type": "string", "description": "Git branch name"},
209
- "remoteOrigin": {"type": ["string", "null"], "description": "Git remote origin URL if available"},
210
- "upstreamOrigin": {"type": ["string", "null"], "description": "Upstream repository URL if this is a fork"}
308
+ "remoteOrigin": {"type": "string", "description": "Git remote origin URL if available"},
309
+ "upstreamOrigin": {"type": "string", "description": "Upstream repository URL if this is a fork"}
211
310
  },
212
311
  "required": ["projectName", "folderPath", "branch"]
213
312
  }
@@ -220,8 +319,8 @@ class MCPCodeIndexServer:
220
319
  "properties": {
221
320
  "projectName": {"type": "string", "description": "The name of the project"},
222
321
  "folderPath": {"type": "string", "description": "Absolute path to the project folder"},
223
- "remoteOrigin": {"type": ["string", "null"], "description": "Git remote origin URL"},
224
- "upstreamOrigin": {"type": ["string", "null"], "description": "Upstream repository URL if this is a fork"},
322
+ "remoteOrigin": {"type": "string", "description": "Git remote origin URL"},
323
+ "upstreamOrigin": {"type": "string", "description": "Upstream repository URL if this is a fork"},
225
324
  "sourceBranch": {"type": "string", "description": "Branch to merge from (e.g., 'feature/new-ui')"},
226
325
  "targetBranch": {"type": "string", "description": "Branch to merge into (e.g., 'main')"},
227
326
  "conflictResolutions": {
@@ -269,7 +368,10 @@ class MCPCodeIndexServer:
269
368
 
270
369
  async def _execute_tool_handler(self, handler, arguments: Dict[str, Any]) -> List[types.TextContent]:
271
370
  """Execute a tool handler and format the result."""
272
- result = await handler(arguments)
371
+ # Clean HTML entities from all arguments before processing
372
+ cleaned_arguments = self._clean_arguments(arguments)
373
+
374
+ result = await handler(cleaned_arguments)
273
375
 
274
376
  return [types.TextContent(
275
377
  type="text",
@@ -409,8 +511,8 @@ class MCPCodeIndexServer:
409
511
  if not scanner.is_valid_project_directory():
410
512
  return False
411
513
 
412
- current_files = scanner.scan_files()
413
- current_basenames = {Path(f).name for f in current_files}
514
+ current_files = scanner.scan_directory()
515
+ current_basenames = {f.name for f in current_files}
414
516
 
415
517
  if not current_basenames:
416
518
  return False
@@ -584,16 +686,19 @@ class MCPCodeIndexServer:
584
686
  branch=resolved_branch
585
687
  )
586
688
 
689
+ # Use provided token limit or fall back to server default
690
+ token_limit = arguments.get("tokenLimit", self.token_limit)
691
+
587
692
  # Calculate total tokens
588
693
  total_tokens = self.token_counter.calculate_codebase_tokens(file_descriptions)
589
- is_large = self.token_counter.is_large_codebase(total_tokens)
590
- recommendation = self.token_counter.get_recommendation(total_tokens)
694
+ is_large = total_tokens > token_limit
695
+ recommendation = "use_search" if is_large else "use_overview"
591
696
 
592
697
  return {
593
698
  "totalTokens": total_tokens,
594
699
  "isLarge": is_large,
595
700
  "recommendation": recommendation,
596
- "tokenLimit": self.token_counter.token_limit,
701
+ "tokenLimit": token_limit,
597
702
  "totalFiles": len(file_descriptions)
598
703
  }
599
704
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: mcp-code-indexer
3
- Version: 1.1.2
3
+ Version: 1.1.5
4
4
  Summary: MCP server that tracks file descriptions across codebases, enabling AI agents to efficiently navigate and understand code through searchable summaries and token-aware overviews.
5
5
  Author: MCP Code Indexer Contributors
6
6
  Maintainer: MCP Code Indexer Contributors
@@ -11,6 +11,7 @@ docs/contributing.md
11
11
  migrations/001_initial.sql
12
12
  migrations/002_performance_indexes.sql
13
13
  src/mcp_code_indexer/__init__.py
14
+ src/mcp_code_indexer/__main__.py
14
15
  src/mcp_code_indexer/error_handler.py
15
16
  src/mcp_code_indexer/file_scanner.py
16
17
  src/mcp_code_indexer/logging_config.py
@@ -1,134 +0,0 @@
1
- #!/usr/bin/env python3
2
- """
3
- MCP Code Indexer Package Main Module
4
-
5
- Entry point for the mcp-code-indexer package when installed via pip.
6
- """
7
-
8
- import argparse
9
- import asyncio
10
- import logging
11
- import sys
12
- from pathlib import Path
13
-
14
- from . import __version__
15
- from .logging_config import setup_logging
16
- from .error_handler import setup_error_handling
17
-
18
-
19
- def parse_arguments() -> argparse.Namespace:
20
- """Parse command line arguments."""
21
- parser = argparse.ArgumentParser(
22
- description="MCP Code Index Server - Track file descriptions across codebases",
23
- prog="mcp-code-indexer"
24
- )
25
-
26
- parser.add_argument(
27
- "--version",
28
- action="version",
29
- version=f"mcp-code-indexer {__version__}"
30
- )
31
-
32
- parser.add_argument(
33
- "--token-limit",
34
- type=int,
35
- default=32000,
36
- help="Maximum tokens before recommending search instead of full overview (default: 32000)"
37
- )
38
-
39
- parser.add_argument(
40
- "--db-path",
41
- type=str,
42
- default="~/.mcp-code-index/tracker.db",
43
- help="Path to SQLite database (default: ~/.mcp-code-index/tracker.db)"
44
- )
45
-
46
- parser.add_argument(
47
- "--cache-dir",
48
- type=str,
49
- default="~/.mcp-code-index/cache",
50
- help="Directory for caching token counts (default: ~/.mcp-code-index/cache)"
51
- )
52
-
53
- parser.add_argument(
54
- "--log-level",
55
- type=str,
56
- choices=["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"],
57
- default="INFO",
58
- help="Logging level (default: INFO)"
59
- )
60
-
61
- return parser.parse_args()
62
-
63
-
64
- async def main() -> None:
65
- """Main entry point for the MCP server."""
66
- args = parse_arguments()
67
-
68
- # Setup structured logging
69
- log_file = Path(args.cache_dir).expanduser() / "server.log" if args.cache_dir else None
70
- logger = setup_logging(
71
- log_level=args.log_level,
72
- log_file=log_file,
73
- enable_file_logging=True
74
- )
75
-
76
- # Setup error handling
77
- error_handler = setup_error_handling(logger)
78
-
79
- # Expand user paths
80
- db_path = Path(args.db_path).expanduser()
81
- cache_dir = Path(args.cache_dir).expanduser()
82
-
83
- # Create directories if they don't exist
84
- db_path.parent.mkdir(parents=True, exist_ok=True)
85
- cache_dir.mkdir(parents=True, exist_ok=True)
86
-
87
- # Log startup information to stderr (stdout reserved for MCP JSON-RPC)
88
- logger.info("Starting MCP Code Index Server", extra={
89
- "structured_data": {
90
- "startup": {
91
- "version": __version__,
92
- "token_limit": args.token_limit,
93
- "db_path": str(db_path),
94
- "cache_dir": str(cache_dir),
95
- "log_level": args.log_level
96
- }
97
- }
98
- })
99
-
100
- try:
101
- # Import and run the MCP server
102
- from .server.mcp_server import MCPCodeIndexServer
103
-
104
- server = MCPCodeIndexServer(
105
- token_limit=args.token_limit,
106
- db_path=db_path,
107
- cache_dir=cache_dir
108
- )
109
-
110
- await server.run()
111
-
112
- except Exception as e:
113
- error_handler.log_error(e, context={"phase": "startup"})
114
- raise
115
-
116
-
117
- def cli_main():
118
- """Console script entry point."""
119
- try:
120
- asyncio.run(main())
121
- except KeyboardInterrupt:
122
- # For MCP servers, we should avoid stdout completely
123
- # The server will log shutdown through stderr
124
- pass
125
- except Exception as e:
126
- # Log critical errors to stderr, not stdout
127
- import traceback
128
- print(f"Server failed to start: {e}", file=sys.stderr)
129
- print(f"Traceback: {traceback.format_exc()}", file=sys.stderr)
130
- sys.exit(1)
131
-
132
-
133
- if __name__ == "__main__":
134
- cli_main()