mcp-code-indexer 4.0.1__py3-none-any.whl → 4.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. mcp_code_indexer/__init__.py +7 -5
  2. mcp_code_indexer/ask_handler.py +2 -2
  3. mcp_code_indexer/claude_api_handler.py +10 -5
  4. mcp_code_indexer/cleanup_manager.py +20 -12
  5. mcp_code_indexer/commands/makelocal.py +85 -63
  6. mcp_code_indexer/data/stop_words_english.txt +1 -1
  7. mcp_code_indexer/database/connection_health.py +29 -20
  8. mcp_code_indexer/database/database.py +44 -31
  9. mcp_code_indexer/database/database_factory.py +19 -20
  10. mcp_code_indexer/database/exceptions.py +10 -10
  11. mcp_code_indexer/database/models.py +126 -1
  12. mcp_code_indexer/database/path_resolver.py +22 -21
  13. mcp_code_indexer/database/retry_executor.py +37 -19
  14. mcp_code_indexer/deepask_handler.py +3 -3
  15. mcp_code_indexer/error_handler.py +46 -20
  16. mcp_code_indexer/file_scanner.py +15 -12
  17. mcp_code_indexer/git_hook_handler.py +71 -76
  18. mcp_code_indexer/logging_config.py +13 -5
  19. mcp_code_indexer/main.py +85 -22
  20. mcp_code_indexer/middleware/__init__.py +1 -1
  21. mcp_code_indexer/middleware/auth.py +47 -43
  22. mcp_code_indexer/middleware/error_middleware.py +15 -15
  23. mcp_code_indexer/middleware/logging.py +44 -42
  24. mcp_code_indexer/middleware/security.py +84 -76
  25. mcp_code_indexer/migrations/002_performance_indexes.sql +1 -1
  26. mcp_code_indexer/migrations/004_remove_branch_dependency.sql +14 -14
  27. mcp_code_indexer/migrations/006_vector_mode.sql +189 -0
  28. mcp_code_indexer/query_preprocessor.py +2 -2
  29. mcp_code_indexer/server/mcp_server.py +158 -94
  30. mcp_code_indexer/transport/__init__.py +1 -1
  31. mcp_code_indexer/transport/base.py +19 -17
  32. mcp_code_indexer/transport/http_transport.py +89 -76
  33. mcp_code_indexer/transport/stdio_transport.py +12 -8
  34. mcp_code_indexer/vector_mode/__init__.py +36 -0
  35. mcp_code_indexer/vector_mode/chunking/__init__.py +19 -0
  36. mcp_code_indexer/vector_mode/chunking/ast_chunker.py +403 -0
  37. mcp_code_indexer/vector_mode/chunking/chunk_optimizer.py +500 -0
  38. mcp_code_indexer/vector_mode/chunking/language_handlers.py +478 -0
  39. mcp_code_indexer/vector_mode/config.py +155 -0
  40. mcp_code_indexer/vector_mode/daemon.py +335 -0
  41. mcp_code_indexer/vector_mode/monitoring/__init__.py +19 -0
  42. mcp_code_indexer/vector_mode/monitoring/change_detector.py +312 -0
  43. mcp_code_indexer/vector_mode/monitoring/file_watcher.py +445 -0
  44. mcp_code_indexer/vector_mode/monitoring/merkle_tree.py +418 -0
  45. mcp_code_indexer/vector_mode/providers/__init__.py +72 -0
  46. mcp_code_indexer/vector_mode/providers/base_provider.py +230 -0
  47. mcp_code_indexer/vector_mode/providers/turbopuffer_client.py +338 -0
  48. mcp_code_indexer/vector_mode/providers/voyage_client.py +212 -0
  49. mcp_code_indexer/vector_mode/security/__init__.py +11 -0
  50. mcp_code_indexer/vector_mode/security/patterns.py +297 -0
  51. mcp_code_indexer/vector_mode/security/redactor.py +368 -0
  52. {mcp_code_indexer-4.0.1.dist-info → mcp_code_indexer-4.1.0.dist-info}/METADATA +82 -24
  53. mcp_code_indexer-4.1.0.dist-info/RECORD +66 -0
  54. mcp_code_indexer-4.0.1.dist-info/RECORD +0 -47
  55. {mcp_code_indexer-4.0.1.dist-info → mcp_code_indexer-4.1.0.dist-info}/LICENSE +0 -0
  56. {mcp_code_indexer-4.0.1.dist-info → mcp_code_indexer-4.1.0.dist-info}/WHEEL +0 -0
  57. {mcp_code_indexer-4.0.1.dist-info → mcp_code_indexer-4.1.0.dist-info}/entry_points.txt +0 -0
@@ -10,16 +10,26 @@ import logging
10
10
  from contextlib import asynccontextmanager
11
11
  from dataclasses import dataclass, field
12
12
  from datetime import datetime, timezone
13
- from typing import Any, AsyncIterator, Callable, Dict, Optional, TypeVar
13
+ from typing import (
14
+ Any,
15
+ AsyncContextManager,
16
+ AsyncIterator,
17
+ Awaitable,
18
+ Callable,
19
+ Dict,
20
+ Optional,
21
+ TypeVar,
22
+ )
14
23
 
15
24
  import aiosqlite
16
25
  from tenacity import (
17
26
  AsyncRetrying,
27
+ RetryCallState,
18
28
  RetryError,
29
+ after_log,
30
+ before_sleep_log,
19
31
  stop_after_attempt,
20
32
  wait_exponential_jitter,
21
- before_sleep_log,
22
- after_log,
23
33
  )
24
34
 
25
35
  logger = logging.getLogger(__name__)
@@ -100,7 +110,7 @@ class RetryExecutor:
100
110
  It replaces the broken async context manager retry pattern.
101
111
  """
102
112
 
103
- def __init__(self, config: Optional[RetryConfig] = None):
113
+ def __init__(self, config: Optional[RetryConfig] = None) -> None:
104
114
  """
105
115
  Initialize retry executor.
106
116
 
@@ -126,7 +136,9 @@ class RetryExecutor:
126
136
  )
127
137
 
128
138
  async def execute_with_retry(
129
- self, operation: Callable[[], T], operation_name: str = "database_operation"
139
+ self,
140
+ operation: Callable[[], Awaitable[T]],
141
+ operation_name: str = "database_operation",
130
142
  ) -> T:
131
143
  """
132
144
  Execute an operation with retry logic.
@@ -198,10 +210,7 @@ class RetryExecutor:
198
210
 
199
211
  original_error = e.last_attempt.exception()
200
212
  logger.error(
201
- (
202
- f"Operation '{operation_name}' failed after "
203
- f"{attempt_count} attempts"
204
- ),
213
+ (f"Operation '{operation_name}' failed after {attempt_count} attempts"),
205
214
  extra={
206
215
  "structured_data": {
207
216
  "retry_exhausted": {
@@ -245,10 +254,14 @@ class RetryExecutor:
245
254
  # Clean up tracking
246
255
  self._operation_start_times.pop(operation_name, None)
247
256
 
257
+ # This should never be reached due to tenacity's retry logic
258
+ # but MyPy requires it for completeness
259
+ raise RuntimeError("Unexpected end of retry logic")
260
+
248
261
  @asynccontextmanager
249
262
  async def get_connection_with_retry(
250
263
  self,
251
- connection_factory: Callable[[], AsyncIterator[aiosqlite.Connection]],
264
+ connection_factory: Callable[[], AsyncContextManager[aiosqlite.Connection]],
252
265
  operation_name: str = "database_connection",
253
266
  ) -> AsyncIterator[aiosqlite.Connection]:
254
267
  """
@@ -267,24 +280,25 @@ class RetryExecutor:
267
280
  Database connection
268
281
  """
269
282
 
270
- async def get_connection():
283
+ async def acquire_connection() -> aiosqlite.Connection:
271
284
  # This function will be retried by execute_with_retry
272
- async with connection_factory() as conn:
273
- # Store connection for the outer context manager
274
- return conn
285
+ # Get the async context manager and enter it
286
+ ctx_manager = connection_factory()
287
+ conn = await ctx_manager.__aenter__()
288
+ return conn
275
289
 
276
290
  # Use execute_with_retry to handle the retry logic
277
291
  # We create a connection and store it for the context manager
278
- connection = await self.execute_with_retry(get_connection, operation_name)
292
+ connection = await self.execute_with_retry(acquire_connection, operation_name)
279
293
 
280
294
  try:
281
295
  yield connection
282
296
  finally:
283
- # Connection cleanup is handled by the original context manager
284
- # in the connection_factory, so nothing to do here
285
- pass
297
+ # Close the connection properly
298
+ if hasattr(connection, "close"):
299
+ await connection.close()
286
300
 
287
- def _should_retry_exception(self, retry_state) -> bool:
301
+ def _should_retry_exception(self, retry_state: RetryCallState) -> bool:
288
302
  """
289
303
  Determine if an exception should trigger a retry.
290
304
 
@@ -303,6 +317,10 @@ class RetryExecutor:
303
317
  if exception is None:
304
318
  return False
305
319
 
320
+ # Only retry if it's an Exception (not BaseException)
321
+ if not isinstance(exception, Exception):
322
+ return False
323
+
306
324
  return self._is_sqlite_retryable_error(exception)
307
325
 
308
326
  def _is_sqlite_retryable_error(self, error: Exception) -> bool:
@@ -9,9 +9,9 @@ Handles enhanced question-answering with two-stage processing:
9
9
 
10
10
  import logging
11
11
  from pathlib import Path
12
- from typing import Dict, List, Optional, Any
12
+ from typing import Any, Dict, List, Optional
13
13
 
14
- from .claude_api_handler import ClaudeAPIHandler, ClaudeAPIError
14
+ from .claude_api_handler import ClaudeAPIError, ClaudeAPIHandler
15
15
  from .database.database import DatabaseManager
16
16
 
17
17
 
@@ -228,7 +228,7 @@ class DeepAskHandler(ClaudeAPIHandler):
228
228
  self.logger.info(f"Search terms: {search_terms}")
229
229
 
230
230
  # Search for relevant files
231
- relevant_files = []
231
+ relevant_files: List[Dict[str, Any]] = []
232
232
  total_files_found = 0
233
233
 
234
234
  try:
@@ -10,7 +10,8 @@ import logging
10
10
  import traceback
11
11
  from datetime import datetime
12
12
  from enum import Enum
13
- from typing import Any, Dict, Optional
13
+ from typing import Any, Dict, Optional, Callable
14
+ from functools import wraps
14
15
 
15
16
  from mcp import types
16
17
 
@@ -146,14 +147,14 @@ class ErrorHandler:
146
147
  error_data["tool_name"] = tool_name
147
148
 
148
149
  if context:
149
- error_data["context"] = context
150
+ error_data["context"] = str(context)
150
151
 
151
152
  if isinstance(error, MCPError):
152
153
  error_data.update(
153
154
  {
154
155
  "category": error.category.value,
155
- "code": error.code,
156
- "details": error.details,
156
+ "code": str(error.code),
157
+ "details": str(error.details),
157
158
  }
158
159
  )
159
160
 
@@ -243,11 +244,19 @@ class ErrorHandler:
243
244
  if task.done() and not task.cancelled():
244
245
  exception = task.exception()
245
246
  if exception:
246
- self.log_error(
247
- exception,
248
- context={**(context or {}), "task_name": task_name},
249
- tool_name="async_task",
250
- )
247
+ # Convert BaseException to Exception for log_error
248
+ if isinstance(exception, Exception):
249
+ self.log_error(
250
+ exception,
251
+ context={**(context or {}), "task_name": task_name},
252
+ tool_name="async_task",
253
+ )
254
+ else:
255
+ self.log_error(
256
+ Exception(str(exception)),
257
+ context={**(context or {}), "task_name": task_name},
258
+ tool_name="async_task",
259
+ )
251
260
  except Exception as e:
252
261
  self.logger.error(f"Error handling task error for {task_name}: {e}")
253
262
 
@@ -258,6 +267,7 @@ class StructuredFormatter(logging.Formatter):
258
267
  def format(self, record: logging.LogRecord) -> str:
259
268
  """Format log record as structured JSON."""
260
269
  import json
270
+
261
271
  from . import __version__
262
272
 
263
273
  log_data = {
@@ -295,12 +305,21 @@ def setup_error_handling(logger: logging.Logger) -> ErrorHandler:
295
305
  error_handler = ErrorHandler(logger)
296
306
 
297
307
  # Set up asyncio exception handler
298
- def asyncio_exception_handler(loop, context):
308
+ def asyncio_exception_handler(
309
+ loop: asyncio.AbstractEventLoop, context: Dict[str, Any]
310
+ ) -> None:
299
311
  exception = context.get("exception")
300
312
  if exception:
301
- error_handler.log_error(
302
- exception, context={"asyncio_context": context, "loop": str(loop)}
303
- )
313
+ # Convert BaseException to Exception for log_error
314
+ if isinstance(exception, Exception):
315
+ error_handler.log_error(
316
+ exception, context={"asyncio_context": context, "loop": str(loop)}
317
+ )
318
+ else:
319
+ error_handler.log_error(
320
+ Exception(str(exception)),
321
+ context={"asyncio_context": context, "loop": str(loop)},
322
+ )
304
323
  else:
305
324
  logger.error(f"Asyncio error: {context}")
306
325
 
@@ -318,10 +337,11 @@ def setup_error_handling(logger: logging.Logger) -> ErrorHandler:
318
337
  # Decorators for common error handling patterns
319
338
 
320
339
 
321
- def handle_database_errors(func):
340
+ def handle_database_errors(func: Callable) -> Callable:
322
341
  """Decorator to handle database errors."""
323
342
 
324
- async def wrapper(*args, **kwargs):
343
+ @wraps(func)
344
+ async def wrapper(*args: Any, **kwargs: Any) -> Any:
325
345
  try:
326
346
  return await func(*args, **kwargs)
327
347
  except Exception as e:
@@ -332,10 +352,11 @@ def handle_database_errors(func):
332
352
  return wrapper
333
353
 
334
354
 
335
- def handle_file_errors(func):
355
+ def handle_file_errors(func: Callable) -> Callable:
336
356
  """Decorator to handle file system errors."""
337
357
 
338
- async def wrapper(*args, **kwargs):
358
+ @wraps(func)
359
+ async def wrapper(*args: Any, **kwargs: Any) -> Any:
339
360
  try:
340
361
  return await func(*args, **kwargs)
341
362
  except (FileNotFoundError, PermissionError, OSError) as e:
@@ -346,11 +367,16 @@ def handle_file_errors(func):
346
367
  return wrapper
347
368
 
348
369
 
349
- def validate_arguments(required_fields: list, optional_fields: list = None):
370
+ def validate_arguments(
371
+ required_fields: list, optional_fields: Optional[list] = None
372
+ ) -> Callable:
350
373
  """Decorator to validate tool arguments."""
351
374
 
352
- def decorator(func):
353
- async def wrapper(self, arguments: Dict[str, Any], *args, **kwargs):
375
+ def decorator(func: Callable) -> Callable:
376
+ @wraps(func)
377
+ async def wrapper(
378
+ self: Any, arguments: Dict[str, Any], *args: Any, **kwargs: Any
379
+ ) -> Any:
354
380
  # Check required fields
355
381
  missing_fields = [
356
382
  field for field in required_fields if field not in arguments
@@ -6,10 +6,10 @@ while respecting .gitignore patterns and common ignore patterns. It enables
6
6
  efficient discovery of files that need description tracking.
7
7
  """
8
8
 
9
+ import fnmatch
9
10
  import logging
10
11
  from pathlib import Path
11
- from typing import List, Set, Optional, Generator
12
- import fnmatch
12
+ from typing import Dict, Generator, List, Optional, Set, Union, Any, cast
13
13
 
14
14
  try:
15
15
  from gitignore_parser import parse_gitignore
@@ -148,7 +148,7 @@ class FileScanner:
148
148
  project_root: Root directory of the project to scan
149
149
  """
150
150
  self.project_root = Path(project_root).resolve()
151
- self._gitignore_cache: dict = {}
151
+ self._gitignore_cache: Dict[str, Any] = {}
152
152
  self._load_gitignore_patterns()
153
153
 
154
154
  def _load_gitignore_patterns(self) -> None:
@@ -348,19 +348,19 @@ class FileScanner:
348
348
  return (
349
349
  self.project_root.exists()
350
350
  and self.project_root.is_dir()
351
- and self.project_root.stat().st_mode & 0o444 # Readable
351
+ and bool(self.project_root.stat().st_mode & 0o444) # Readable
352
352
  )
353
353
  except (OSError, PermissionError):
354
354
  return False
355
355
 
356
- def get_project_stats(self) -> dict:
356
+ def get_project_stats(self) -> Dict[str, Union[int, Dict[str, int]]]:
357
357
  """
358
358
  Get statistics about the project directory.
359
359
 
360
360
  Returns:
361
361
  Dictionary with project statistics for trackable files only
362
362
  """
363
- stats = {
363
+ stats: Dict[str, Union[int, Dict[str, int]]] = {
364
364
  "total_files": 0,
365
365
  "trackable_files": 0,
366
366
  "ignored_files": 0,
@@ -375,24 +375,27 @@ class FileScanner:
375
375
 
376
376
  # Check if trackable first
377
377
  if self.should_ignore_file(file_path):
378
- stats["ignored_files"] += 1
378
+ ignored_files = cast(int, stats["ignored_files"])
379
+ stats["ignored_files"] = ignored_files + 1
379
380
  continue
380
381
 
381
382
  # Only process trackable files for detailed stats
382
- stats["trackable_files"] += 1
383
+ trackable_files = cast(int, stats["trackable_files"])
384
+ stats["trackable_files"] = trackable_files + 1
383
385
 
384
386
  # Track file size
385
387
  try:
386
388
  file_size = file_path.stat().st_size
387
- stats["largest_file_size"] = max(
388
- stats["largest_file_size"], file_size
389
- )
389
+ largest_file_size = cast(int, stats["largest_file_size"])
390
+ stats["largest_file_size"] = max(largest_file_size, file_size)
390
391
  except OSError:
391
392
  pass
392
393
 
393
394
  # Track extensions for trackable files only
394
395
  ext = file_path.suffix.lower()
395
- stats["file_extensions"][ext] = stats["file_extensions"].get(ext, 0) + 1
396
+ file_extensions = stats["file_extensions"]
397
+ if isinstance(file_extensions, dict):
398
+ file_extensions[ext] = file_extensions.get(ext, 0) + 1
396
399
 
397
400
  # Total files is just trackable files
398
401
  stats["total_files"] = stats["trackable_files"]