mcp-code-indexer 1.1.3__tar.gz → 1.1.5__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. {mcp_code_indexer-1.1.3/src/mcp_code_indexer.egg-info → mcp_code_indexer-1.1.5}/PKG-INFO +1 -1
  2. {mcp_code_indexer-1.1.3 → mcp_code_indexer-1.1.5}/pyproject.toml +1 -1
  3. mcp_code_indexer-1.1.5/src/mcp_code_indexer/__main__.py +11 -0
  4. mcp_code_indexer-1.1.5/src/mcp_code_indexer/main.py +399 -0
  5. {mcp_code_indexer-1.1.3 → mcp_code_indexer-1.1.5}/src/mcp_code_indexer/server/mcp_server.py +119 -18
  6. {mcp_code_indexer-1.1.3 → mcp_code_indexer-1.1.5/src/mcp_code_indexer.egg-info}/PKG-INFO +1 -1
  7. {mcp_code_indexer-1.1.3 → mcp_code_indexer-1.1.5}/src/mcp_code_indexer.egg-info/SOURCES.txt +1 -0
  8. mcp_code_indexer-1.1.3/src/mcp_code_indexer/main.py +0 -134
  9. {mcp_code_indexer-1.1.3 → mcp_code_indexer-1.1.5}/LICENSE +0 -0
  10. {mcp_code_indexer-1.1.3 → mcp_code_indexer-1.1.5}/MANIFEST.in +0 -0
  11. {mcp_code_indexer-1.1.3 → mcp_code_indexer-1.1.5}/README.md +0 -0
  12. {mcp_code_indexer-1.1.3 → mcp_code_indexer-1.1.5}/docs/api-reference.md +0 -0
  13. {mcp_code_indexer-1.1.3 → mcp_code_indexer-1.1.5}/docs/architecture.md +0 -0
  14. {mcp_code_indexer-1.1.3 → mcp_code_indexer-1.1.5}/docs/configuration.md +0 -0
  15. {mcp_code_indexer-1.1.3 → mcp_code_indexer-1.1.5}/docs/contributing.md +0 -0
  16. {mcp_code_indexer-1.1.3 → mcp_code_indexer-1.1.5}/migrations/001_initial.sql +0 -0
  17. {mcp_code_indexer-1.1.3 → mcp_code_indexer-1.1.5}/migrations/002_performance_indexes.sql +0 -0
  18. {mcp_code_indexer-1.1.3 → mcp_code_indexer-1.1.5}/requirements.txt +0 -0
  19. {mcp_code_indexer-1.1.3 → mcp_code_indexer-1.1.5}/setup.cfg +0 -0
  20. {mcp_code_indexer-1.1.3 → mcp_code_indexer-1.1.5}/setup.py +0 -0
  21. {mcp_code_indexer-1.1.3 → mcp_code_indexer-1.1.5}/src/mcp_code_indexer/__init__.py +0 -0
  22. {mcp_code_indexer-1.1.3 → mcp_code_indexer-1.1.5}/src/mcp_code_indexer/database/__init__.py +0 -0
  23. {mcp_code_indexer-1.1.3 → mcp_code_indexer-1.1.5}/src/mcp_code_indexer/database/database.py +0 -0
  24. {mcp_code_indexer-1.1.3 → mcp_code_indexer-1.1.5}/src/mcp_code_indexer/database/models.py +0 -0
  25. {mcp_code_indexer-1.1.3 → mcp_code_indexer-1.1.5}/src/mcp_code_indexer/error_handler.py +0 -0
  26. {mcp_code_indexer-1.1.3 → mcp_code_indexer-1.1.5}/src/mcp_code_indexer/file_scanner.py +0 -0
  27. {mcp_code_indexer-1.1.3 → mcp_code_indexer-1.1.5}/src/mcp_code_indexer/logging_config.py +0 -0
  28. {mcp_code_indexer-1.1.3 → mcp_code_indexer-1.1.5}/src/mcp_code_indexer/merge_handler.py +0 -0
  29. {mcp_code_indexer-1.1.3 → mcp_code_indexer-1.1.5}/src/mcp_code_indexer/middleware/__init__.py +0 -0
  30. {mcp_code_indexer-1.1.3 → mcp_code_indexer-1.1.5}/src/mcp_code_indexer/middleware/error_middleware.py +0 -0
  31. {mcp_code_indexer-1.1.3 → mcp_code_indexer-1.1.5}/src/mcp_code_indexer/server/__init__.py +0 -0
  32. {mcp_code_indexer-1.1.3 → mcp_code_indexer-1.1.5}/src/mcp_code_indexer/tiktoken_cache/9b5ad71b2ce5302211f9c61530b329a4922fc6a4 +0 -0
  33. {mcp_code_indexer-1.1.3 → mcp_code_indexer-1.1.5}/src/mcp_code_indexer/token_counter.py +0 -0
  34. {mcp_code_indexer-1.1.3 → mcp_code_indexer-1.1.5}/src/mcp_code_indexer/tools/__init__.py +0 -0
  35. {mcp_code_indexer-1.1.3 → mcp_code_indexer-1.1.5}/src/mcp_code_indexer.egg-info/dependency_links.txt +0 -0
  36. {mcp_code_indexer-1.1.3 → mcp_code_indexer-1.1.5}/src/mcp_code_indexer.egg-info/entry_points.txt +0 -0
  37. {mcp_code_indexer-1.1.3 → mcp_code_indexer-1.1.5}/src/mcp_code_indexer.egg-info/requires.txt +0 -0
  38. {mcp_code_indexer-1.1.3 → mcp_code_indexer-1.1.5}/src/mcp_code_indexer.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: mcp-code-indexer
3
- Version: 1.1.3
3
+ Version: 1.1.5
4
4
  Summary: MCP server that tracks file descriptions across codebases, enabling AI agents to efficiently navigate and understand code through searchable summaries and token-aware overviews.
5
5
  Author: MCP Code Indexer Contributors
6
6
  Maintainer: MCP Code Indexer Contributors
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "mcp-code-indexer"
7
- version = "1.1.3"
7
+ version = "1.1.5"
8
8
  description = "MCP server that tracks file descriptions across codebases, enabling AI agents to efficiently navigate and understand code through searchable summaries and token-aware overviews."
9
9
  readme = "README.md"
10
10
  license = {text = "MIT"}
@@ -0,0 +1,11 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ MCP Code Indexer Package Main Module
4
+
5
+ Entry point for the mcp-code-indexer package when called with python -m.
6
+ """
7
+
8
+ from .main import cli_main
9
+
10
+ if __name__ == "__main__":
11
+ cli_main()
@@ -0,0 +1,399 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ MCP Code Indexer Package Main Module
4
+
5
+ Entry point for the mcp-code-indexer package when installed via pip.
6
+ """
7
+
8
+ import argparse
9
+ import asyncio
10
+ import json
11
+ import logging
12
+ import sys
13
+ from pathlib import Path
14
+ from typing import Any, Dict
15
+
16
+ from . import __version__
17
+ from .logging_config import setup_logging
18
+ from .error_handler import setup_error_handling
19
+
20
+
21
+ def parse_arguments() -> argparse.Namespace:
22
+ """Parse command line arguments."""
23
+ parser = argparse.ArgumentParser(
24
+ description="MCP Code Index Server - Track file descriptions across codebases",
25
+ prog="mcp-code-indexer"
26
+ )
27
+
28
+ parser.add_argument(
29
+ "--version",
30
+ action="version",
31
+ version=f"mcp-code-indexer {__version__}"
32
+ )
33
+
34
+ parser.add_argument(
35
+ "--token-limit",
36
+ type=int,
37
+ default=32000,
38
+ help="Maximum tokens before recommending search instead of full overview (default: 32000)"
39
+ )
40
+
41
+ parser.add_argument(
42
+ "--db-path",
43
+ type=str,
44
+ default="~/.mcp-code-index/tracker.db",
45
+ help="Path to SQLite database (default: ~/.mcp-code-index/tracker.db)"
46
+ )
47
+
48
+ parser.add_argument(
49
+ "--cache-dir",
50
+ type=str,
51
+ default="~/.mcp-code-index/cache",
52
+ help="Directory for caching token counts (default: ~/.mcp-code-index/cache)"
53
+ )
54
+
55
+ parser.add_argument(
56
+ "--log-level",
57
+ type=str,
58
+ choices=["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"],
59
+ default="INFO",
60
+ help="Logging level (default: INFO)"
61
+ )
62
+
63
+ # Utility commands
64
+ parser.add_argument(
65
+ "--getprojects",
66
+ action="store_true",
67
+ help="List all projects with IDs, branches, and description counts"
68
+ )
69
+
70
+ parser.add_argument(
71
+ "--runcommand",
72
+ type=str,
73
+ help="Execute a command using JSON in MCP format (single or multi-line)"
74
+ )
75
+
76
+ parser.add_argument(
77
+ "--dumpdescriptions",
78
+ nargs="+",
79
+ metavar=("PROJECT_ID", "BRANCH"),
80
+ help="Export descriptions for a project. Usage: --dumpdescriptions PROJECT_ID [BRANCH]"
81
+ )
82
+
83
+ return parser.parse_args()
84
+
85
+
86
+ async def handle_getprojects(args: argparse.Namespace) -> None:
87
+ """Handle --getprojects command."""
88
+ try:
89
+ from .database.database import DatabaseManager
90
+
91
+ # Initialize database
92
+ db_path = Path(args.db_path).expanduser()
93
+ db_manager = DatabaseManager(db_path)
94
+ await db_manager.initialize()
95
+
96
+ # Get all projects
97
+ projects = await db_manager.get_all_projects()
98
+
99
+ if not projects:
100
+ print("No projects found.")
101
+ return
102
+
103
+ print("Projects:")
104
+ print("-" * 80)
105
+
106
+ for project in projects:
107
+ print(f"ID: {project.id}")
108
+ print(f"Name: {project.name}")
109
+ print(f"Remote Origin: {project.remote_origin or 'N/A'}")
110
+ print(f"Upstream Origin: {project.upstream_origin or 'N/A'}")
111
+
112
+ # Get branch information
113
+ try:
114
+ branch_counts = await db_manager.get_branch_file_counts(project.id)
115
+ if branch_counts:
116
+ print("Branches:")
117
+ for branch, count in branch_counts.items():
118
+ print(f" - {branch}: {count} descriptions")
119
+ else:
120
+ print("Branches: No descriptions found")
121
+ except Exception as e:
122
+ print(f"Branches: Error loading branch info - {e}")
123
+
124
+ print("-" * 80)
125
+
126
+ except Exception as e:
127
+ print(f"Error: {e}", file=sys.stderr)
128
+ sys.exit(1)
129
+
130
+
131
+ async def handle_runcommand(args: argparse.Namespace) -> None:
132
+ """Handle --runcommand command."""
133
+ from .server.mcp_server import MCPCodeIndexServer
134
+
135
+ try:
136
+ # Parse JSON (handle both single-line and multi-line)
137
+ json_data = json.loads(args.runcommand)
138
+ except json.JSONDecodeError as e:
139
+ print(f"Initial JSON parse failed: {e}", file=sys.stderr)
140
+
141
+ # Try to repair the JSON
142
+ try:
143
+ import re
144
+ repaired = args.runcommand
145
+
146
+ # Fix common issues
147
+ # Quote unquoted URLs and paths
148
+ url_pattern = r'("[\w]+"):\s*([a-zA-Z][a-zA-Z0-9+.-]*://[^\s,}]+|/[^\s,}]*)'
149
+ repaired = re.sub(url_pattern, r'\1: "\2"', repaired)
150
+
151
+ # Quote unquoted values
152
+ unquoted_pattern = r'("[\w]+"):\s*([a-zA-Z0-9_-]+)(?=\s*[,}])'
153
+ repaired = re.sub(unquoted_pattern, r'\1: "\2"', repaired)
154
+
155
+ # Remove trailing commas
156
+ repaired = re.sub(r',(\s*[}\]])', r'\1', repaired)
157
+
158
+ json_data = json.loads(repaired)
159
+ print(f"JSON repaired successfully", file=sys.stderr)
160
+ print(f"Original: {args.runcommand}", file=sys.stderr)
161
+ print(f"Repaired: {repaired}", file=sys.stderr)
162
+ except json.JSONDecodeError as repair_error:
163
+ print(f"JSON repair also failed: {repair_error}", file=sys.stderr)
164
+ print(f"Original JSON: {args.runcommand}", file=sys.stderr)
165
+ sys.exit(1)
166
+
167
+ # Initialize server
168
+ db_path = Path(args.db_path).expanduser()
169
+ cache_dir = Path(args.cache_dir).expanduser()
170
+
171
+ server = MCPCodeIndexServer(
172
+ token_limit=args.token_limit,
173
+ db_path=db_path,
174
+ cache_dir=cache_dir
175
+ )
176
+ await server.initialize()
177
+
178
+ # Extract the tool call information from the JSON
179
+ if "method" in json_data and json_data["method"] == "tools/call":
180
+ tool_name = json_data["params"]["name"]
181
+ tool_arguments = json_data["params"]["arguments"]
182
+ elif "projectName" in json_data and "folderPath" in json_data:
183
+ # Auto-detect: user provided just arguments, try to infer the tool
184
+ if "filePath" in json_data and "description" in json_data:
185
+ tool_name = "update_file_description"
186
+ tool_arguments = json_data
187
+ print("Auto-detected tool: update_file_description", file=sys.stderr)
188
+ elif "branch" in json_data:
189
+ tool_name = "check_codebase_size"
190
+ tool_arguments = json_data
191
+ print("Auto-detected tool: check_codebase_size", file=sys.stderr)
192
+ else:
193
+ print("Error: Could not auto-detect tool from arguments. Please use full MCP format:", file=sys.stderr)
194
+ print('{"method": "tools/call", "params": {"name": "TOOL_NAME", "arguments": {...}}}', file=sys.stderr)
195
+ sys.exit(1)
196
+
197
+ try:
198
+ # Map tool names to handler methods
199
+ tool_handlers = {
200
+ "get_file_description": server._handle_get_file_description,
201
+ "update_file_description": server._handle_update_file_description,
202
+ "check_codebase_size": server._handle_check_codebase_size,
203
+ "find_missing_descriptions": server._handle_find_missing_descriptions,
204
+ "search_descriptions": server._handle_search_descriptions,
205
+ "get_codebase_overview": server._handle_get_codebase_overview,
206
+ "merge_branch_descriptions": server._handle_merge_branch_descriptions,
207
+ }
208
+
209
+ if tool_name not in tool_handlers:
210
+ error_result = {
211
+ "error": {
212
+ "code": -32601,
213
+ "message": f"Unknown tool: {tool_name}"
214
+ }
215
+ }
216
+ print(json.dumps(error_result, indent=2))
217
+ return
218
+
219
+ # Clean HTML entities from arguments before execution
220
+ def clean_html_entities(text: str) -> str:
221
+ if not text:
222
+ return text
223
+ import html
224
+ return html.unescape(text)
225
+
226
+ def clean_arguments(arguments: dict) -> dict:
227
+ cleaned = {}
228
+ for key, value in arguments.items():
229
+ if isinstance(value, str):
230
+ cleaned[key] = clean_html_entities(value)
231
+ elif isinstance(value, list):
232
+ cleaned[key] = [
233
+ clean_html_entities(item) if isinstance(item, str) else item
234
+ for item in value
235
+ ]
236
+ elif isinstance(value, dict):
237
+ cleaned[key] = clean_arguments(value)
238
+ else:
239
+ cleaned[key] = value
240
+ return cleaned
241
+
242
+ cleaned_tool_arguments = clean_arguments(tool_arguments)
243
+
244
+ # Execute the tool handler directly
245
+ result = await tool_handlers[tool_name](cleaned_tool_arguments)
246
+ print(json.dumps(result, indent=2, default=str))
247
+ except Exception as e:
248
+ error_result = {
249
+ "error": {
250
+ "code": -32603,
251
+ "message": str(e)
252
+ }
253
+ }
254
+ print(json.dumps(error_result, indent=2))
255
+ else:
256
+ print("Error: JSON must contain a valid MCP tool call", file=sys.stderr)
257
+ sys.exit(1)
258
+
259
+
260
+ async def handle_dumpdescriptions(args: argparse.Namespace) -> None:
261
+ """Handle --dumpdescriptions command."""
262
+ from .database.database import DatabaseManager
263
+ from .token_counter import TokenCounter
264
+
265
+ if len(args.dumpdescriptions) < 1:
266
+ print("Error: Project ID is required", file=sys.stderr)
267
+ sys.exit(1)
268
+
269
+ project_id = args.dumpdescriptions[0]
270
+ branch = args.dumpdescriptions[1] if len(args.dumpdescriptions) > 1 else None
271
+
272
+ # Initialize database and token counter
273
+ db_path = Path(args.db_path).expanduser()
274
+ db_manager = DatabaseManager(db_path)
275
+ await db_manager.initialize()
276
+
277
+ token_counter = TokenCounter(args.token_limit)
278
+
279
+ # Get file descriptions
280
+ if branch:
281
+ file_descriptions = await db_manager.get_all_file_descriptions(
282
+ project_id=project_id,
283
+ branch=branch
284
+ )
285
+ print(f"File descriptions for project {project_id}, branch {branch}:")
286
+ else:
287
+ file_descriptions = await db_manager.get_all_file_descriptions(
288
+ project_id=project_id
289
+ )
290
+ print(f"File descriptions for project {project_id} (all branches):")
291
+
292
+ print("=" * 80)
293
+
294
+ if not file_descriptions:
295
+ print("No descriptions found.")
296
+ total_tokens = 0
297
+ else:
298
+ total_tokens = 0
299
+ for desc in file_descriptions:
300
+ print(f"File: {desc.file_path}")
301
+ if branch is None:
302
+ print(f"Branch: {desc.branch}")
303
+ print(f"Description: {desc.description}")
304
+ print("-" * 40)
305
+
306
+ # Count tokens for this description
307
+ desc_tokens = token_counter.count_file_description_tokens(desc)
308
+ total_tokens += desc_tokens
309
+
310
+ print("=" * 80)
311
+ print(f"Total descriptions: {len(file_descriptions)}")
312
+ print(f"Total tokens: {total_tokens}")
313
+
314
+
315
+
316
+ async def main() -> None:
317
+ """Main entry point for the MCP server."""
318
+ args = parse_arguments()
319
+
320
+ # Handle utility commands
321
+ if args.getprojects:
322
+ await handle_getprojects(args)
323
+ return
324
+
325
+ if args.runcommand:
326
+ await handle_runcommand(args)
327
+ return
328
+
329
+ if args.dumpdescriptions:
330
+ await handle_dumpdescriptions(args)
331
+ return
332
+
333
+ # Setup structured logging
334
+ log_file = Path(args.cache_dir).expanduser() / "server.log" if args.cache_dir else None
335
+ logger = setup_logging(
336
+ log_level=args.log_level,
337
+ log_file=log_file,
338
+ enable_file_logging=True
339
+ )
340
+
341
+ # Setup error handling
342
+ error_handler = setup_error_handling(logger)
343
+
344
+ # Expand user paths
345
+ db_path = Path(args.db_path).expanduser()
346
+ cache_dir = Path(args.cache_dir).expanduser()
347
+
348
+ # Create directories if they don't exist
349
+ db_path.parent.mkdir(parents=True, exist_ok=True)
350
+ cache_dir.mkdir(parents=True, exist_ok=True)
351
+
352
+ # Log startup information to stderr (stdout reserved for MCP JSON-RPC)
353
+ logger.info("Starting MCP Code Index Server", extra={
354
+ "structured_data": {
355
+ "startup": {
356
+ "version": __version__,
357
+ "token_limit": args.token_limit,
358
+ "db_path": str(db_path),
359
+ "cache_dir": str(cache_dir),
360
+ "log_level": args.log_level
361
+ }
362
+ }
363
+ })
364
+
365
+ try:
366
+ # Import and run the MCP server
367
+ from .server.mcp_server import MCPCodeIndexServer
368
+
369
+ server = MCPCodeIndexServer(
370
+ token_limit=args.token_limit,
371
+ db_path=db_path,
372
+ cache_dir=cache_dir
373
+ )
374
+
375
+ await server.run()
376
+
377
+ except Exception as e:
378
+ error_handler.log_error(e, context={"phase": "startup"})
379
+ raise
380
+
381
+
382
+ def cli_main():
383
+ """Console script entry point."""
384
+ try:
385
+ asyncio.run(main())
386
+ except KeyboardInterrupt:
387
+ # For MCP servers, we should avoid stdout completely
388
+ # The server will log shutdown through stderr
389
+ pass
390
+ except Exception as e:
391
+ # Log critical errors to stderr, not stdout
392
+ import traceback
393
+ print(f"Server failed to start: {e}", file=sys.stderr)
394
+ print(f"Traceback: {traceback.format_exc()}", file=sys.stderr)
395
+ sys.exit(1)
396
+
397
+
398
+ if __name__ == "__main__":
399
+ cli_main()
@@ -7,8 +7,10 @@ for file description management tools.
7
7
 
8
8
  import asyncio
9
9
  import hashlib
10
+ import html
10
11
  import json
11
12
  import logging
13
+ import re
12
14
  import uuid
13
15
  from datetime import datetime
14
16
  from pathlib import Path
@@ -85,6 +87,102 @@ class MCPCodeIndexServer:
85
87
  extra={"structured_data": {"initialization": {"token_limit": token_limit}}}
86
88
  )
87
89
 
90
+ def _clean_html_entities(self, text: str) -> str:
91
+ """
92
+ Clean HTML entities from text to prevent encoding issues.
93
+
94
+ Args:
95
+ text: Text that may contain HTML entities
96
+
97
+ Returns:
98
+ Text with HTML entities decoded to proper characters
99
+ """
100
+ if not text:
101
+ return text
102
+
103
+ # Decode HTML entities like &lt; &gt; &amp; etc.
104
+ return html.unescape(text)
105
+
106
+ def _clean_arguments(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
107
+ """
108
+ Clean HTML entities from all text arguments.
109
+
110
+ Args:
111
+ arguments: Dictionary of arguments to clean
112
+
113
+ Returns:
114
+ Dictionary with HTML entities decoded in all string values
115
+ """
116
+ cleaned = {}
117
+
118
+ for key, value in arguments.items():
119
+ if isinstance(value, str):
120
+ cleaned[key] = self._clean_html_entities(value)
121
+ elif isinstance(value, list):
122
+ # Clean strings in lists (like conflict resolutions)
123
+ cleaned[key] = [
124
+ self._clean_html_entities(item) if isinstance(item, str) else item
125
+ for item in value
126
+ ]
127
+ elif isinstance(value, dict):
128
+ # Recursively clean nested dictionaries
129
+ cleaned[key] = self._clean_arguments(value)
130
+ else:
131
+ # Pass through other types unchanged
132
+ cleaned[key] = value
133
+
134
+ return cleaned
135
+
136
+ def _parse_json_robust(self, json_str: str) -> Dict[str, Any]:
137
+ """
138
+ Parse JSON with automatic repair for common issues.
139
+
140
+ Args:
141
+ json_str: JSON string that may have formatting issues
142
+
143
+ Returns:
144
+ Parsed JSON dictionary
145
+
146
+ Raises:
147
+ ValueError: If JSON cannot be parsed even after repair attempts
148
+ """
149
+ # First try normal parsing
150
+ try:
151
+ return json.loads(json_str)
152
+ except json.JSONDecodeError as original_error:
153
+ logger.warning(f"Initial JSON parse failed: {original_error}")
154
+
155
+ # Try to repair common issues
156
+ repaired = json_str
157
+
158
+ # Fix 1: Quote unquoted URLs and paths
159
+ # Look for patterns like: "key": http://... or "key": /path/...
160
+ url_pattern = r'("[\w]+"):\s*([a-zA-Z][a-zA-Z0-9+.-]*://[^\s,}]+|/[^\s,}]*)'
161
+ repaired = re.sub(url_pattern, r'\1: "\2"', repaired)
162
+
163
+ # Fix 2: Quote unquoted boolean-like strings
164
+ # Look for: "key": true-ish-string or "key": false-ish-string
165
+ bool_pattern = r'("[\w]+"):\s*([a-zA-Z][a-zA-Z0-9_-]*[a-zA-Z0-9])(?=\s*[,}])'
166
+ repaired = re.sub(bool_pattern, r'\1: "\2"', repaired)
167
+
168
+ # Fix 3: Remove trailing commas
169
+ repaired = re.sub(r',(\s*[}\]])', r'\1', repaired)
170
+
171
+ # Fix 4: Ensure proper string quoting for common unquoted values
172
+ # Handle cases like: "key": value (where value should be "value")
173
+ unquoted_pattern = r'("[\w]+"):\s*([a-zA-Z0-9_-]+)(?=\s*[,}])'
174
+ repaired = re.sub(unquoted_pattern, r'\1: "\2"', repaired)
175
+
176
+ try:
177
+ result = json.loads(repaired)
178
+ logger.info(f"Successfully repaired JSON. Original: {json_str[:100]}...")
179
+ logger.info(f"Repaired: {repaired[:100]}...")
180
+ return result
181
+ except json.JSONDecodeError as repair_error:
182
+ logger.error(f"JSON repair failed. Original: {json_str}")
183
+ logger.error(f"Repaired attempt: {repaired}")
184
+ raise ValueError(f"Could not parse JSON even after repair attempts. Original error: {original_error}, Repair error: {repair_error}")
185
+
88
186
  async def initialize(self) -> None:
89
187
  """Initialize database and other resources."""
90
188
  await self.db_manager.initialize()
@@ -116,11 +214,11 @@ class MCPCodeIndexServer:
116
214
  "description": "Git branch name (e.g., 'main', 'develop')"
117
215
  },
118
216
  "remoteOrigin": {
119
- "type": ["string", "null"],
217
+ "type": "string",
120
218
  "description": "Git remote origin URL if available"
121
219
  },
122
220
  "upstreamOrigin": {
123
- "type": ["string", "null"],
221
+ "type": "string",
124
222
  "description": "Upstream repository URL if this is a fork"
125
223
  },
126
224
  "filePath": {
@@ -140,11 +238,11 @@ class MCPCodeIndexServer:
140
238
  "projectName": {"type": "string", "description": "The name of the project"},
141
239
  "folderPath": {"type": "string", "description": "Absolute path to the project folder on disk"},
142
240
  "branch": {"type": "string", "description": "Git branch name"},
143
- "remoteOrigin": {"type": ["string", "null"], "description": "Git remote origin URL if available"},
144
- "upstreamOrigin": {"type": ["string", "null"], "description": "Upstream repository URL if this is a fork"},
241
+ "remoteOrigin": {"type": "string", "description": "Git remote origin URL if available"},
242
+ "upstreamOrigin": {"type": "string", "description": "Upstream repository URL if this is a fork"},
145
243
  "filePath": {"type": "string", "description": "Relative path to the file from project root"},
146
244
  "description": {"type": "string", "description": "Detailed description of the file's contents"},
147
- "fileHash": {"type": ["string", "null"], "description": "SHA-256 hash of the file contents (optional)"}
245
+ "fileHash": {"type": "string", "description": "SHA-256 hash of the file contents (optional)"}
148
246
  },
149
247
  "required": ["projectName", "folderPath", "branch", "filePath", "description"]
150
248
  }
@@ -158,8 +256,8 @@ class MCPCodeIndexServer:
158
256
  "projectName": {"type": "string", "description": "The name of the project"},
159
257
  "folderPath": {"type": "string", "description": "Absolute path to the project folder on disk"},
160
258
  "branch": {"type": "string", "description": "Git branch name"},
161
- "remoteOrigin": {"type": ["string", "null"], "description": "Git remote origin URL if available"},
162
- "upstreamOrigin": {"type": ["string", "null"], "description": "Upstream repository URL if this is a fork"},
259
+ "remoteOrigin": {"type": "string", "description": "Git remote origin URL if available"},
260
+ "upstreamOrigin": {"type": "string", "description": "Upstream repository URL if this is a fork"},
163
261
  "tokenLimit": {"type": "integer", "description": "Optional token limit override (defaults to server configuration)"}
164
262
  },
165
263
  "required": ["projectName", "folderPath", "branch"]
@@ -174,8 +272,8 @@ class MCPCodeIndexServer:
174
272
  "projectName": {"type": "string", "description": "The name of the project"},
175
273
  "folderPath": {"type": "string", "description": "Absolute path to the project folder on disk"},
176
274
  "branch": {"type": "string", "description": "Git branch name"},
177
- "remoteOrigin": {"type": ["string", "null"], "description": "Git remote origin URL if available"},
178
- "upstreamOrigin": {"type": ["string", "null"], "description": "Upstream repository URL if this is a fork"},
275
+ "remoteOrigin": {"type": "string", "description": "Git remote origin URL if available"},
276
+ "upstreamOrigin": {"type": "string", "description": "Upstream repository URL if this is a fork"},
179
277
  "limit": {"type": "integer", "description": "Maximum number of missing files to return (optional)"}
180
278
  },
181
279
  "required": ["projectName", "folderPath", "branch"]
@@ -190,8 +288,8 @@ class MCPCodeIndexServer:
190
288
  "projectName": {"type": "string", "description": "The name of the project"},
191
289
  "folderPath": {"type": "string", "description": "Absolute path to the project folder on disk"},
192
290
  "branch": {"type": "string", "description": "Git branch to search in"},
193
- "remoteOrigin": {"type": ["string", "null"], "description": "Git remote origin URL if available"},
194
- "upstreamOrigin": {"type": ["string", "null"], "description": "Upstream repository URL if this is a fork"},
291
+ "remoteOrigin": {"type": "string", "description": "Git remote origin URL if available"},
292
+ "upstreamOrigin": {"type": "string", "description": "Upstream repository URL if this is a fork"},
195
293
  "query": {"type": "string", "description": "Search query (e.g., 'authentication middleware', 'database models')"},
196
294
  "maxResults": {"type": "integer", "default": 20, "description": "Maximum number of results to return"}
197
295
  },
@@ -207,8 +305,8 @@ class MCPCodeIndexServer:
207
305
  "projectName": {"type": "string", "description": "The name of the project"},
208
306
  "folderPath": {"type": "string", "description": "Absolute path to the project folder on disk"},
209
307
  "branch": {"type": "string", "description": "Git branch name"},
210
- "remoteOrigin": {"type": ["string", "null"], "description": "Git remote origin URL if available"},
211
- "upstreamOrigin": {"type": ["string", "null"], "description": "Upstream repository URL if this is a fork"}
308
+ "remoteOrigin": {"type": "string", "description": "Git remote origin URL if available"},
309
+ "upstreamOrigin": {"type": "string", "description": "Upstream repository URL if this is a fork"}
212
310
  },
213
311
  "required": ["projectName", "folderPath", "branch"]
214
312
  }
@@ -221,8 +319,8 @@ class MCPCodeIndexServer:
221
319
  "properties": {
222
320
  "projectName": {"type": "string", "description": "The name of the project"},
223
321
  "folderPath": {"type": "string", "description": "Absolute path to the project folder"},
224
- "remoteOrigin": {"type": ["string", "null"], "description": "Git remote origin URL"},
225
- "upstreamOrigin": {"type": ["string", "null"], "description": "Upstream repository URL if this is a fork"},
322
+ "remoteOrigin": {"type": "string", "description": "Git remote origin URL"},
323
+ "upstreamOrigin": {"type": "string", "description": "Upstream repository URL if this is a fork"},
226
324
  "sourceBranch": {"type": "string", "description": "Branch to merge from (e.g., 'feature/new-ui')"},
227
325
  "targetBranch": {"type": "string", "description": "Branch to merge into (e.g., 'main')"},
228
326
  "conflictResolutions": {
@@ -270,7 +368,10 @@ class MCPCodeIndexServer:
270
368
 
271
369
  async def _execute_tool_handler(self, handler, arguments: Dict[str, Any]) -> List[types.TextContent]:
272
370
  """Execute a tool handler and format the result."""
273
- result = await handler(arguments)
371
+ # Clean HTML entities from all arguments before processing
372
+ cleaned_arguments = self._clean_arguments(arguments)
373
+
374
+ result = await handler(cleaned_arguments)
274
375
 
275
376
  return [types.TextContent(
276
377
  type="text",
@@ -410,8 +511,8 @@ class MCPCodeIndexServer:
410
511
  if not scanner.is_valid_project_directory():
411
512
  return False
412
513
 
413
- current_files = scanner.scan_files()
414
- current_basenames = {Path(f).name for f in current_files}
514
+ current_files = scanner.scan_directory()
515
+ current_basenames = {f.name for f in current_files}
415
516
 
416
517
  if not current_basenames:
417
518
  return False
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: mcp-code-indexer
3
- Version: 1.1.3
3
+ Version: 1.1.5
4
4
  Summary: MCP server that tracks file descriptions across codebases, enabling AI agents to efficiently navigate and understand code through searchable summaries and token-aware overviews.
5
5
  Author: MCP Code Indexer Contributors
6
6
  Maintainer: MCP Code Indexer Contributors
@@ -11,6 +11,7 @@ docs/contributing.md
11
11
  migrations/001_initial.sql
12
12
  migrations/002_performance_indexes.sql
13
13
  src/mcp_code_indexer/__init__.py
14
+ src/mcp_code_indexer/__main__.py
14
15
  src/mcp_code_indexer/error_handler.py
15
16
  src/mcp_code_indexer/file_scanner.py
16
17
  src/mcp_code_indexer/logging_config.py
@@ -1,134 +0,0 @@
1
- #!/usr/bin/env python3
2
- """
3
- MCP Code Indexer Package Main Module
4
-
5
- Entry point for the mcp-code-indexer package when installed via pip.
6
- """
7
-
8
- import argparse
9
- import asyncio
10
- import logging
11
- import sys
12
- from pathlib import Path
13
-
14
- from . import __version__
15
- from .logging_config import setup_logging
16
- from .error_handler import setup_error_handling
17
-
18
-
19
- def parse_arguments() -> argparse.Namespace:
20
- """Parse command line arguments."""
21
- parser = argparse.ArgumentParser(
22
- description="MCP Code Index Server - Track file descriptions across codebases",
23
- prog="mcp-code-indexer"
24
- )
25
-
26
- parser.add_argument(
27
- "--version",
28
- action="version",
29
- version=f"mcp-code-indexer {__version__}"
30
- )
31
-
32
- parser.add_argument(
33
- "--token-limit",
34
- type=int,
35
- default=32000,
36
- help="Maximum tokens before recommending search instead of full overview (default: 32000)"
37
- )
38
-
39
- parser.add_argument(
40
- "--db-path",
41
- type=str,
42
- default="~/.mcp-code-index/tracker.db",
43
- help="Path to SQLite database (default: ~/.mcp-code-index/tracker.db)"
44
- )
45
-
46
- parser.add_argument(
47
- "--cache-dir",
48
- type=str,
49
- default="~/.mcp-code-index/cache",
50
- help="Directory for caching token counts (default: ~/.mcp-code-index/cache)"
51
- )
52
-
53
- parser.add_argument(
54
- "--log-level",
55
- type=str,
56
- choices=["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"],
57
- default="INFO",
58
- help="Logging level (default: INFO)"
59
- )
60
-
61
- return parser.parse_args()
62
-
63
-
64
- async def main() -> None:
65
- """Main entry point for the MCP server."""
66
- args = parse_arguments()
67
-
68
- # Setup structured logging
69
- log_file = Path(args.cache_dir).expanduser() / "server.log" if args.cache_dir else None
70
- logger = setup_logging(
71
- log_level=args.log_level,
72
- log_file=log_file,
73
- enable_file_logging=True
74
- )
75
-
76
- # Setup error handling
77
- error_handler = setup_error_handling(logger)
78
-
79
- # Expand user paths
80
- db_path = Path(args.db_path).expanduser()
81
- cache_dir = Path(args.cache_dir).expanduser()
82
-
83
- # Create directories if they don't exist
84
- db_path.parent.mkdir(parents=True, exist_ok=True)
85
- cache_dir.mkdir(parents=True, exist_ok=True)
86
-
87
- # Log startup information to stderr (stdout reserved for MCP JSON-RPC)
88
- logger.info("Starting MCP Code Index Server", extra={
89
- "structured_data": {
90
- "startup": {
91
- "version": __version__,
92
- "token_limit": args.token_limit,
93
- "db_path": str(db_path),
94
- "cache_dir": str(cache_dir),
95
- "log_level": args.log_level
96
- }
97
- }
98
- })
99
-
100
- try:
101
- # Import and run the MCP server
102
- from .server.mcp_server import MCPCodeIndexServer
103
-
104
- server = MCPCodeIndexServer(
105
- token_limit=args.token_limit,
106
- db_path=db_path,
107
- cache_dir=cache_dir
108
- )
109
-
110
- await server.run()
111
-
112
- except Exception as e:
113
- error_handler.log_error(e, context={"phase": "startup"})
114
- raise
115
-
116
-
117
- def cli_main():
118
- """Console script entry point."""
119
- try:
120
- asyncio.run(main())
121
- except KeyboardInterrupt:
122
- # For MCP servers, we should avoid stdout completely
123
- # The server will log shutdown through stderr
124
- pass
125
- except Exception as e:
126
- # Log critical errors to stderr, not stdout
127
- import traceback
128
- print(f"Server failed to start: {e}", file=sys.stderr)
129
- print(f"Traceback: {traceback.format_exc()}", file=sys.stderr)
130
- sys.exit(1)
131
-
132
-
133
- if __name__ == "__main__":
134
- cli_main()