mcp-code-indexer 1.9.1__py3-none-any.whl → 2.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -13,12 +13,20 @@ from datetime import datetime, timedelta
13
13
  from pathlib import Path
14
14
  from typing import List, Optional, Dict, Any, Tuple, AsyncIterator
15
15
 
16
+ import asyncio
17
+ import random
16
18
  import aiosqlite
17
19
 
18
20
  from mcp_code_indexer.database.models import (
19
21
  Project, FileDescription, MergeConflict, SearchResult,
20
22
  CodebaseSizeInfo, ProjectOverview, WordFrequencyResult, WordFrequencyTerm
21
23
  )
24
+ from mcp_code_indexer.database.retry_handler import (
25
+ RetryHandler, ConnectionRecoveryManager, create_retry_handler
26
+ )
27
+ from mcp_code_indexer.database.connection_health import (
28
+ ConnectionHealthMonitor, DatabaseMetricsCollector
29
+ )
22
30
 
23
31
  logger = logging.getLogger(__name__)
24
32
 
@@ -31,19 +39,52 @@ class DatabaseManager:
31
39
  and caching with proper transaction management and error handling.
32
40
  """
33
41
 
34
- def __init__(self, db_path: Path, pool_size: int = 5):
42
+ def __init__(self,
43
+ db_path: Path,
44
+ pool_size: int = 3,
45
+ retry_count: int = 5,
46
+ timeout: float = 10.0,
47
+ enable_wal_mode: bool = True,
48
+ health_check_interval: float = 30.0):
35
49
  """Initialize database manager with path to SQLite database."""
36
50
  self.db_path = db_path
37
51
  self.pool_size = pool_size
52
+ self.retry_count = retry_count
53
+ self.timeout = timeout
54
+ self.enable_wal_mode = enable_wal_mode
55
+ self.health_check_interval = health_check_interval
38
56
  self._connection_pool: List[aiosqlite.Connection] = []
39
57
  self._pool_lock = None # Will be initialized in async context
58
+ self._write_lock = None # Write serialization lock, initialized in async context
59
+
60
+ # Retry and recovery components - configure with provided settings
61
+ from .retry_handler import RetryConfig
62
+ retry_config = RetryConfig(max_attempts=retry_count)
63
+ self._retry_handler = create_retry_handler(retry_config)
64
+ self._recovery_manager = None # Initialized in async context
65
+
66
+ # Health monitoring and metrics
67
+ self._health_monitor = None # Initialized in async context
68
+ self._metrics_collector = DatabaseMetricsCollector()
40
69
 
41
70
  async def initialize(self) -> None:
42
71
  """Initialize database schema and configuration."""
43
72
  import asyncio
44
73
 
45
- # Initialize pool lock
74
+ # Initialize locks
46
75
  self._pool_lock = asyncio.Lock()
76
+ self._write_lock = asyncio.Lock()
77
+
78
+ # Initialize connection recovery manager
79
+ self._recovery_manager = ConnectionRecoveryManager(self)
80
+
81
+ # Initialize health monitoring with configured interval
82
+ self._health_monitor = ConnectionHealthMonitor(
83
+ self,
84
+ check_interval=self.health_check_interval,
85
+ timeout_seconds=self.timeout
86
+ )
87
+ await self._health_monitor.start_monitoring()
47
88
 
48
89
  # Ensure database directory exists
49
90
  self.db_path.parent.mkdir(parents=True, exist_ok=True)
@@ -56,6 +97,9 @@ class DatabaseManager:
56
97
  # Enable row factory for easier data access
57
98
  db.row_factory = aiosqlite.Row
58
99
 
100
+ # Configure WAL mode and optimizations for concurrent access
101
+ await self._configure_database_optimizations(db, include_wal_mode=self.enable_wal_mode)
102
+
59
103
  # Apply each migration
60
104
  for migration_file in migration_files:
61
105
  logger.info(f"Applying migration: {migration_file.name}")
@@ -67,6 +111,48 @@ class DatabaseManager:
67
111
 
68
112
  logger.info(f"Database initialized at {self.db_path} with {len(migration_files)} migrations")
69
113
 
114
+ async def _configure_database_optimizations(self, db: aiosqlite.Connection, include_wal_mode: bool = True) -> None:
115
+ """
116
+ Configure SQLite optimizations for concurrent access and performance.
117
+
118
+ Args:
119
+ db: Database connection to configure
120
+ include_wal_mode: Whether to set WAL mode (only needed once per database)
121
+ """
122
+ optimizations = []
123
+
124
+ # WAL mode is database-level, only set during initialization
125
+ if include_wal_mode:
126
+ optimizations.append("PRAGMA journal_mode = WAL")
127
+ logger.info("Enabling WAL mode for database concurrency")
128
+
129
+ # Connection-level optimizations that can be set per connection
130
+ optimizations.extend([
131
+ "PRAGMA synchronous = NORMAL", # Balance durability/performance
132
+ "PRAGMA cache_size = -64000", # 64MB cache
133
+ "PRAGMA temp_store = MEMORY", # Use memory for temp tables
134
+ "PRAGMA mmap_size = 268435456", # 256MB memory mapping
135
+ "PRAGMA busy_timeout = 10000", # 10 second timeout (reduced from 30s)
136
+ "PRAGMA optimize" # Enable query planner optimizations
137
+ ])
138
+
139
+ # WAL-specific settings (only if WAL mode is being set)
140
+ if include_wal_mode:
141
+ optimizations.append("PRAGMA wal_autocheckpoint = 1000") # Checkpoint after 1000 pages
142
+
143
+ for pragma in optimizations:
144
+ try:
145
+ await db.execute(pragma)
146
+ logger.debug(f"Applied optimization: {pragma}")
147
+ except Exception as e:
148
+ logger.warning(f"Failed to apply optimization '{pragma}': {e}")
149
+
150
+ await db.commit()
151
+ if include_wal_mode:
152
+ logger.info("Database optimizations configured for concurrent access with WAL mode")
153
+ else:
154
+ logger.debug("Connection optimizations applied")
155
+
70
156
  @asynccontextmanager
71
157
  async def get_connection(self) -> AsyncIterator[aiosqlite.Connection]:
72
158
  """Get a database connection from pool or create new one."""
@@ -83,11 +169,8 @@ class DatabaseManager:
83
169
  conn = await aiosqlite.connect(self.db_path)
84
170
  conn.row_factory = aiosqlite.Row
85
171
 
86
- # Apply performance settings to new connections
87
- await conn.execute("PRAGMA busy_timeout = 30000") # 30 second timeout
88
- await conn.execute("PRAGMA synchronous = NORMAL") # Balanced durability/performance
89
- await conn.execute("PRAGMA cache_size = -64000") # 64MB cache
90
- await conn.execute("PRAGMA temp_store = MEMORY") # Use memory for temp tables
172
+ # Apply connection-level optimizations (WAL mode already set during initialization)
173
+ await self._configure_database_optimizations(conn, include_wal_mode=False)
91
174
 
92
175
  try:
93
176
  yield conn
@@ -104,18 +187,259 @@ class DatabaseManager:
104
187
  await conn.close()
105
188
 
106
189
  async def close_pool(self) -> None:
107
- """Close all connections in the pool."""
190
+ """Close all connections in the pool and stop monitoring."""
191
+ # Stop health monitoring
192
+ if self._health_monitor:
193
+ await self._health_monitor.stop_monitoring()
194
+
195
+ # Close connections
108
196
  if self._pool_lock:
109
197
  async with self._pool_lock:
110
198
  for conn in self._connection_pool:
111
199
  await conn.close()
112
200
  self._connection_pool.clear()
113
201
 
202
+ @asynccontextmanager
203
+ async def get_write_connection(self) -> AsyncIterator[aiosqlite.Connection]:
204
+ """
205
+ Get a database connection with write serialization.
206
+
207
+ This ensures only one write operation occurs at a time across the entire
208
+ application, preventing database locking issues in multi-client scenarios.
209
+ """
210
+ if self._write_lock is None:
211
+ raise RuntimeError("DatabaseManager not initialized - call initialize() first")
212
+
213
+ async with self._write_lock:
214
+ async with self.get_connection() as conn:
215
+ yield conn
216
+
217
+ @asynccontextmanager
218
+ async def get_write_connection_with_retry(self, operation_name: str = "write_operation") -> AsyncIterator[aiosqlite.Connection]:
219
+ """
220
+ Get a database connection with write serialization and automatic retry logic.
221
+
222
+ This combines write serialization with retry handling for maximum resilience
223
+ against database locking issues.
224
+
225
+ Args:
226
+ operation_name: Name of the operation for logging and monitoring
227
+ """
228
+ if self._write_lock is None or self._retry_handler is None:
229
+ raise RuntimeError("DatabaseManager not initialized - call initialize() first")
230
+
231
+ async with self._retry_handler.with_retry(operation_name):
232
+ try:
233
+ async with self._write_lock:
234
+ async with self.get_connection() as conn:
235
+ yield conn
236
+
237
+ # Reset failure count on success
238
+ if self._recovery_manager:
239
+ self._recovery_manager.reset_failure_count()
240
+
241
+ except Exception as e:
242
+ # Handle persistent failures
243
+ if self._recovery_manager:
244
+ await self._recovery_manager.handle_persistent_failure(operation_name, e)
245
+ raise
246
+
247
+ def get_database_stats(self) -> Dict[str, Any]:
248
+ """
249
+ Get database performance and reliability statistics.
250
+
251
+ Returns:
252
+ Dictionary with retry stats, recovery stats, health status, and metrics
253
+ """
254
+ stats = {
255
+ "connection_pool": {
256
+ "configured_size": self.pool_size,
257
+ "current_size": len(self._connection_pool)
258
+ }
259
+ }
260
+
261
+ if self._retry_handler:
262
+ stats["retry_stats"] = self._retry_handler.get_retry_stats()
263
+
264
+ if self._recovery_manager:
265
+ stats["recovery_stats"] = self._recovery_manager.get_recovery_stats()
266
+
267
+ if self._health_monitor:
268
+ stats["health_status"] = self._health_monitor.get_health_status()
269
+
270
+ if self._metrics_collector:
271
+ stats["operation_metrics"] = self._metrics_collector.get_operation_metrics()
272
+ stats["locking_frequency"] = self._metrics_collector.get_locking_frequency()
273
+
274
+ return stats
275
+
276
+ async def check_health(self) -> Dict[str, Any]:
277
+ """
278
+ Perform an immediate health check and return detailed status.
279
+
280
+ Returns:
281
+ Dictionary with health check result and current metrics
282
+ """
283
+ if not self._health_monitor:
284
+ return {"error": "Health monitoring not initialized"}
285
+
286
+ # Perform immediate health check
287
+ health_result = await self._health_monitor.check_health()
288
+
289
+ return {
290
+ "health_check": {
291
+ "is_healthy": health_result.is_healthy,
292
+ "response_time_ms": health_result.response_time_ms,
293
+ "error_message": health_result.error_message,
294
+ "timestamp": health_result.timestamp.isoformat()
295
+ },
296
+ "overall_status": self._health_monitor.get_health_status(),
297
+ "recent_history": self._health_monitor.get_recent_history()
298
+ }
299
+
300
+ @asynccontextmanager
301
+ async def get_immediate_transaction(
302
+ self,
303
+ operation_name: str = "immediate_transaction",
304
+ timeout_seconds: float = 10.0
305
+ ) -> AsyncIterator[aiosqlite.Connection]:
306
+ """
307
+ Get a database connection with BEGIN IMMEDIATE transaction and timeout.
308
+
309
+ This ensures write locks are acquired immediately, preventing lock escalation
310
+ failures that can occur with DEFERRED transactions.
311
+
312
+ Args:
313
+ operation_name: Name of the operation for monitoring
314
+ timeout_seconds: Transaction timeout in seconds
315
+ """
316
+ async with self.get_write_connection_with_retry(operation_name) as conn:
317
+ try:
318
+ # Start immediate transaction with timeout
319
+ async with asyncio.timeout(timeout_seconds):
320
+ await conn.execute("BEGIN IMMEDIATE")
321
+ yield conn
322
+ await conn.commit()
323
+ except asyncio.TimeoutError:
324
+ logger.warning(
325
+ f"Transaction timeout after {timeout_seconds}s for {operation_name}",
326
+ extra={
327
+ "structured_data": {
328
+ "transaction_timeout": {
329
+ "operation": operation_name,
330
+ "timeout_seconds": timeout_seconds
331
+ }
332
+ }
333
+ }
334
+ )
335
+ await conn.rollback()
336
+ raise
337
+ except Exception as e:
338
+ logger.error(f"Transaction failed for {operation_name}: {e}")
339
+ await conn.rollback()
340
+ raise
341
+
342
+ async def execute_transaction_with_retry(
343
+ self,
344
+ operation_func,
345
+ operation_name: str = "transaction_operation",
346
+ max_retries: int = 3,
347
+ timeout_seconds: float = 10.0
348
+ ) -> Any:
349
+ """
350
+ Execute a database operation within a transaction with automatic retry.
351
+
352
+ Args:
353
+ operation_func: Async function that takes a connection and performs the operation
354
+ operation_name: Name of the operation for logging
355
+ max_retries: Maximum retry attempts
356
+ timeout_seconds: Transaction timeout in seconds
357
+
358
+ Returns:
359
+ Result from operation_func
360
+
361
+ Example:
362
+ async def my_operation(conn):
363
+ await conn.execute("INSERT INTO ...", (...))
364
+ return "success"
365
+
366
+ result = await db.execute_transaction_with_retry(my_operation, "insert_data")
367
+ """
368
+ last_error = None
369
+
370
+ for attempt in range(1, max_retries + 1):
371
+ try:
372
+ async with self.get_immediate_transaction(operation_name, timeout_seconds) as conn:
373
+ result = await operation_func(conn)
374
+
375
+ # Record successful operation metrics
376
+ if self._metrics_collector:
377
+ self._metrics_collector.record_operation(
378
+ operation_name,
379
+ timeout_seconds * 1000, # Convert to ms
380
+ True,
381
+ len(self._connection_pool)
382
+ )
383
+
384
+ return result
385
+
386
+ except (aiosqlite.OperationalError, asyncio.TimeoutError) as e:
387
+ last_error = e
388
+
389
+ # Record locking event for metrics
390
+ if self._metrics_collector and "locked" in str(e).lower():
391
+ self._metrics_collector.record_locking_event(operation_name, str(e))
392
+
393
+ if attempt < max_retries:
394
+ # Exponential backoff with jitter
395
+ delay = 0.1 * (2 ** (attempt - 1))
396
+ jitter = delay * 0.1 * (2 * random.random() - 1) # ±10% jitter
397
+ wait_time = max(0.05, delay + jitter)
398
+
399
+ logger.warning(
400
+ f"Transaction attempt {attempt} failed for {operation_name}, retrying in {wait_time:.2f}s: {e}",
401
+ extra={
402
+ "structured_data": {
403
+ "transaction_retry": {
404
+ "operation": operation_name,
405
+ "attempt": attempt,
406
+ "delay_seconds": wait_time,
407
+ "error": str(e)
408
+ }
409
+ }
410
+ }
411
+ )
412
+ await asyncio.sleep(wait_time)
413
+ else:
414
+ # Record failed operation metrics
415
+ if self._metrics_collector:
416
+ self._metrics_collector.record_operation(
417
+ operation_name,
418
+ timeout_seconds * 1000,
419
+ False,
420
+ len(self._connection_pool)
421
+ )
422
+
423
+ logger.error(
424
+ f"Transaction failed after {max_retries} attempts for {operation_name}: {e}",
425
+ extra={
426
+ "structured_data": {
427
+ "transaction_failure": {
428
+ "operation": operation_name,
429
+ "max_retries": max_retries,
430
+ "final_error": str(e)
431
+ }
432
+ }
433
+ }
434
+ )
435
+
436
+ raise last_error
437
+
114
438
  # Project operations
115
439
 
116
440
  async def create_project(self, project: Project) -> None:
117
441
  """Create a new project record."""
118
- async with self.get_connection() as db:
442
+ async with self.get_write_connection_with_retry("create_project") as db:
119
443
  await db.execute(
120
444
  """
121
445
  INSERT INTO projects (id, name, remote_origin, upstream_origin, aliases, created, last_accessed)
@@ -302,7 +626,7 @@ class DatabaseManager:
302
626
 
303
627
  async def update_project_access_time(self, project_id: str) -> None:
304
628
  """Update the last accessed time for a project."""
305
- async with self.get_connection() as db:
629
+ async with self.get_write_connection_with_retry("update_project_access_time") as db:
306
630
  await db.execute(
307
631
  "UPDATE projects SET last_accessed = ? WHERE id = ?",
308
632
  (datetime.utcnow(), project_id)
@@ -311,7 +635,7 @@ class DatabaseManager:
311
635
 
312
636
  async def update_project(self, project: Project) -> None:
313
637
  """Update an existing project record."""
314
- async with self.get_connection() as db:
638
+ async with self.get_write_connection_with_retry("update_project") as db:
315
639
  await db.execute(
316
640
  """
317
641
  UPDATE projects
@@ -373,7 +697,7 @@ class DatabaseManager:
373
697
 
374
698
  async def create_file_description(self, file_desc: FileDescription) -> None:
375
699
  """Create or update a file description."""
376
- async with self.get_connection() as db:
700
+ async with self.get_write_connection_with_retry("create_file_description") as db:
377
701
  await db.execute(
378
702
  """
379
703
  INSERT OR REPLACE INTO file_descriptions
@@ -456,11 +780,11 @@ class DatabaseManager:
456
780
  ]
457
781
 
458
782
  async def batch_create_file_descriptions(self, file_descriptions: List[FileDescription]) -> None:
459
- """Batch create multiple file descriptions efficiently."""
783
+ """Batch create multiple file descriptions efficiently with optimized transactions."""
460
784
  if not file_descriptions:
461
785
  return
462
-
463
- async with self.get_connection() as db:
786
+
787
+ async def batch_operation(conn: aiosqlite.Connection) -> None:
464
788
  data = [
465
789
  (
466
790
  fd.project_id,
@@ -475,7 +799,7 @@ class DatabaseManager:
475
799
  for fd in file_descriptions
476
800
  ]
477
801
 
478
- await db.executemany(
802
+ await conn.executemany(
479
803
  """
480
804
  INSERT OR REPLACE INTO file_descriptions
481
805
  (project_id, branch, file_path, description, file_hash, last_modified, version, source_project_id)
@@ -483,8 +807,13 @@ class DatabaseManager:
483
807
  """,
484
808
  data
485
809
  )
486
- await db.commit()
487
810
  logger.debug(f"Batch created {len(file_descriptions)} file descriptions")
811
+
812
+ await self.execute_transaction_with_retry(
813
+ batch_operation,
814
+ f"batch_create_file_descriptions_{len(file_descriptions)}_files",
815
+ timeout_seconds=30.0 # Longer timeout for batch operations
816
+ )
488
817
 
489
818
  # Search operations
490
819
 
@@ -552,7 +881,7 @@ class DatabaseManager:
552
881
  """Cache token count with TTL."""
553
882
  expires = datetime.utcnow() + timedelta(hours=ttl_hours)
554
883
 
555
- async with self.get_connection() as db:
884
+ async with self.get_write_connection() as db:
556
885
  await db.execute(
557
886
  """
558
887
  INSERT OR REPLACE INTO token_cache (cache_key, token_count, expires)
@@ -564,7 +893,7 @@ class DatabaseManager:
564
893
 
565
894
  async def cleanup_expired_cache(self) -> None:
566
895
  """Remove expired cache entries."""
567
- async with self.get_connection() as db:
896
+ async with self.get_write_connection() as db:
568
897
  await db.execute(
569
898
  "DELETE FROM token_cache WHERE expires < ?",
570
899
  (datetime.utcnow(),)
@@ -663,7 +992,7 @@ class DatabaseManager:
663
992
 
664
993
  async def create_project_overview(self, overview: ProjectOverview) -> None:
665
994
  """Create or update a project overview."""
666
- async with self.get_connection() as db:
995
+ async with self.get_write_connection() as db:
667
996
  await db.execute(
668
997
  """
669
998
  INSERT OR REPLACE INTO project_overviews
@@ -716,9 +1045,9 @@ class DatabaseManager:
716
1045
  """
717
1046
  removed_files = []
718
1047
 
719
- async with self.get_connection() as db:
1048
+ async def cleanup_operation(conn: aiosqlite.Connection) -> List[str]:
720
1049
  # Get all file descriptions for this project/branch
721
- cursor = await db.execute(
1050
+ cursor = await conn.execute(
722
1051
  "SELECT file_path FROM file_descriptions WHERE project_id = ? AND branch = ?",
723
1052
  (project_id, branch)
724
1053
  )
@@ -733,16 +1062,22 @@ class DatabaseManager:
733
1062
 
734
1063
  if not full_path.exists():
735
1064
  to_remove.append(file_path)
736
- removed_files.append(file_path)
737
1065
 
738
1066
  # Remove descriptions for missing files
739
1067
  if to_remove:
740
- await db.executemany(
1068
+ await conn.executemany(
741
1069
  "DELETE FROM file_descriptions WHERE project_id = ? AND branch = ? AND file_path = ?",
742
1070
  [(project_id, branch, path) for path in to_remove]
743
1071
  )
744
- await db.commit()
745
1072
  logger.info(f"Cleaned up {len(to_remove)} missing files from {project_id}/{branch}")
1073
+
1074
+ return to_remove
1075
+
1076
+ removed_files = await self.execute_transaction_with_retry(
1077
+ cleanup_operation,
1078
+ f"cleanup_missing_files_{project_id}_{branch}",
1079
+ timeout_seconds=60.0 # Longer timeout for file system operations
1080
+ )
746
1081
 
747
1082
  return removed_files
748
1083
 
@@ -820,7 +1155,7 @@ class DatabaseManager:
820
1155
  Returns:
821
1156
  Number of projects removed
822
1157
  """
823
- async with self.get_connection() as db:
1158
+ async with self.get_write_connection() as db:
824
1159
  # Find projects with no descriptions and no overview
825
1160
  cursor = await db.execute("""
826
1161
  SELECT p.id, p.name