mcp-code-indexer 1.9.1__py3-none-any.whl → 2.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -13,12 +13,20 @@ from datetime import datetime, timedelta
13
13
  from pathlib import Path
14
14
  from typing import List, Optional, Dict, Any, Tuple, AsyncIterator
15
15
 
16
+ import asyncio
17
+ import random
16
18
  import aiosqlite
17
19
 
18
20
  from mcp_code_indexer.database.models import (
19
21
  Project, FileDescription, MergeConflict, SearchResult,
20
22
  CodebaseSizeInfo, ProjectOverview, WordFrequencyResult, WordFrequencyTerm
21
23
  )
24
+ from mcp_code_indexer.database.retry_handler import (
25
+ RetryHandler, ConnectionRecoveryManager, create_retry_handler
26
+ )
27
+ from mcp_code_indexer.database.connection_health import (
28
+ ConnectionHealthMonitor, DatabaseMetricsCollector
29
+ )
22
30
 
23
31
  logger = logging.getLogger(__name__)
24
32
 
@@ -31,19 +39,36 @@ class DatabaseManager:
31
39
  and caching with proper transaction management and error handling.
32
40
  """
33
41
 
34
- def __init__(self, db_path: Path, pool_size: int = 5):
42
+ def __init__(self, db_path: Path, pool_size: int = 3):
35
43
  """Initialize database manager with path to SQLite database."""
36
44
  self.db_path = db_path
37
45
  self.pool_size = pool_size
38
46
  self._connection_pool: List[aiosqlite.Connection] = []
39
47
  self._pool_lock = None # Will be initialized in async context
48
+ self._write_lock = None # Write serialization lock, initialized in async context
49
+
50
+ # Retry and recovery components
51
+ self._retry_handler = create_retry_handler()
52
+ self._recovery_manager = None # Initialized in async context
53
+
54
+ # Health monitoring and metrics
55
+ self._health_monitor = None # Initialized in async context
56
+ self._metrics_collector = DatabaseMetricsCollector()
40
57
 
41
58
  async def initialize(self) -> None:
42
59
  """Initialize database schema and configuration."""
43
60
  import asyncio
44
61
 
45
- # Initialize pool lock
62
+ # Initialize locks
46
63
  self._pool_lock = asyncio.Lock()
64
+ self._write_lock = asyncio.Lock()
65
+
66
+ # Initialize connection recovery manager
67
+ self._recovery_manager = ConnectionRecoveryManager(self)
68
+
69
+ # Initialize health monitoring
70
+ self._health_monitor = ConnectionHealthMonitor(self)
71
+ await self._health_monitor.start_monitoring()
47
72
 
48
73
  # Ensure database directory exists
49
74
  self.db_path.parent.mkdir(parents=True, exist_ok=True)
@@ -56,6 +81,9 @@ class DatabaseManager:
56
81
  # Enable row factory for easier data access
57
82
  db.row_factory = aiosqlite.Row
58
83
 
84
+ # Configure WAL mode and optimizations for concurrent access
85
+ await self._configure_database_optimizations(db)
86
+
59
87
  # Apply each migration
60
88
  for migration_file in migration_files:
61
89
  logger.info(f"Applying migration: {migration_file.name}")
@@ -67,6 +95,47 @@ class DatabaseManager:
67
95
 
68
96
  logger.info(f"Database initialized at {self.db_path} with {len(migration_files)} migrations")
69
97
 
98
+ async def _configure_database_optimizations(self, db: aiosqlite.Connection, include_wal_mode: bool = True) -> None:
99
+ """
100
+ Configure SQLite optimizations for concurrent access and performance.
101
+
102
+ Args:
103
+ db: Database connection to configure
104
+ include_wal_mode: Whether to set WAL mode (only needed once per database)
105
+ """
106
+ optimizations = []
107
+
108
+ # WAL mode is database-level, only set during initialization
109
+ if include_wal_mode:
110
+ optimizations.append("PRAGMA journal_mode = WAL")
111
+
112
+ # Connection-level optimizations that can be set per connection
113
+ optimizations.extend([
114
+ "PRAGMA synchronous = NORMAL", # Balance durability/performance
115
+ "PRAGMA cache_size = -64000", # 64MB cache
116
+ "PRAGMA temp_store = MEMORY", # Use memory for temp tables
117
+ "PRAGMA mmap_size = 268435456", # 256MB memory mapping
118
+ "PRAGMA busy_timeout = 10000", # 10 second timeout (reduced from 30s)
119
+ "PRAGMA optimize" # Enable query planner optimizations
120
+ ])
121
+
122
+ # WAL-specific settings (only if WAL mode is being set)
123
+ if include_wal_mode:
124
+ optimizations.append("PRAGMA wal_autocheckpoint = 1000") # Checkpoint after 1000 pages
125
+
126
+ for pragma in optimizations:
127
+ try:
128
+ await db.execute(pragma)
129
+ logger.debug(f"Applied optimization: {pragma}")
130
+ except Exception as e:
131
+ logger.warning(f"Failed to apply optimization '{pragma}': {e}")
132
+
133
+ await db.commit()
134
+ if include_wal_mode:
135
+ logger.info("Database optimizations configured for concurrent access with WAL mode")
136
+ else:
137
+ logger.debug("Connection optimizations applied")
138
+
70
139
  @asynccontextmanager
71
140
  async def get_connection(self) -> AsyncIterator[aiosqlite.Connection]:
72
141
  """Get a database connection from pool or create new one."""
@@ -83,11 +152,8 @@ class DatabaseManager:
83
152
  conn = await aiosqlite.connect(self.db_path)
84
153
  conn.row_factory = aiosqlite.Row
85
154
 
86
- # Apply performance settings to new connections
87
- await conn.execute("PRAGMA busy_timeout = 30000") # 30 second timeout
88
- await conn.execute("PRAGMA synchronous = NORMAL") # Balanced durability/performance
89
- await conn.execute("PRAGMA cache_size = -64000") # 64MB cache
90
- await conn.execute("PRAGMA temp_store = MEMORY") # Use memory for temp tables
155
+ # Apply connection-level optimizations (WAL mode already set during initialization)
156
+ await self._configure_database_optimizations(conn, include_wal_mode=False)
91
157
 
92
158
  try:
93
159
  yield conn
@@ -104,18 +170,259 @@ class DatabaseManager:
104
170
  await conn.close()
105
171
 
106
172
  async def close_pool(self) -> None:
107
- """Close all connections in the pool."""
173
+ """Close all connections in the pool and stop monitoring."""
174
+ # Stop health monitoring
175
+ if self._health_monitor:
176
+ await self._health_monitor.stop_monitoring()
177
+
178
+ # Close connections
108
179
  if self._pool_lock:
109
180
  async with self._pool_lock:
110
181
  for conn in self._connection_pool:
111
182
  await conn.close()
112
183
  self._connection_pool.clear()
113
184
 
185
+ @asynccontextmanager
186
+ async def get_write_connection(self) -> AsyncIterator[aiosqlite.Connection]:
187
+ """
188
+ Get a database connection with write serialization.
189
+
190
+ This ensures only one write operation occurs at a time across the entire
191
+ application, preventing database locking issues in multi-client scenarios.
192
+ """
193
+ if self._write_lock is None:
194
+ raise RuntimeError("DatabaseManager not initialized - call initialize() first")
195
+
196
+ async with self._write_lock:
197
+ async with self.get_connection() as conn:
198
+ yield conn
199
+
200
+ @asynccontextmanager
201
+ async def get_write_connection_with_retry(self, operation_name: str = "write_operation") -> AsyncIterator[aiosqlite.Connection]:
202
+ """
203
+ Get a database connection with write serialization and automatic retry logic.
204
+
205
+ This combines write serialization with retry handling for maximum resilience
206
+ against database locking issues.
207
+
208
+ Args:
209
+ operation_name: Name of the operation for logging and monitoring
210
+ """
211
+ if self._write_lock is None or self._retry_handler is None:
212
+ raise RuntimeError("DatabaseManager not initialized - call initialize() first")
213
+
214
+ async with self._retry_handler.with_retry(operation_name):
215
+ try:
216
+ async with self._write_lock:
217
+ async with self.get_connection() as conn:
218
+ yield conn
219
+
220
+ # Reset failure count on success
221
+ if self._recovery_manager:
222
+ self._recovery_manager.reset_failure_count()
223
+
224
+ except Exception as e:
225
+ # Handle persistent failures
226
+ if self._recovery_manager:
227
+ await self._recovery_manager.handle_persistent_failure(operation_name, e)
228
+ raise
229
+
230
+ def get_database_stats(self) -> Dict[str, Any]:
231
+ """
232
+ Get database performance and reliability statistics.
233
+
234
+ Returns:
235
+ Dictionary with retry stats, recovery stats, health status, and metrics
236
+ """
237
+ stats = {
238
+ "connection_pool": {
239
+ "configured_size": self.pool_size,
240
+ "current_size": len(self._connection_pool)
241
+ }
242
+ }
243
+
244
+ if self._retry_handler:
245
+ stats["retry_stats"] = self._retry_handler.get_retry_stats()
246
+
247
+ if self._recovery_manager:
248
+ stats["recovery_stats"] = self._recovery_manager.get_recovery_stats()
249
+
250
+ if self._health_monitor:
251
+ stats["health_status"] = self._health_monitor.get_health_status()
252
+
253
+ if self._metrics_collector:
254
+ stats["operation_metrics"] = self._metrics_collector.get_operation_metrics()
255
+ stats["locking_frequency"] = self._metrics_collector.get_locking_frequency()
256
+
257
+ return stats
258
+
259
+ async def check_health(self) -> Dict[str, Any]:
260
+ """
261
+ Perform an immediate health check and return detailed status.
262
+
263
+ Returns:
264
+ Dictionary with health check result and current metrics
265
+ """
266
+ if not self._health_monitor:
267
+ return {"error": "Health monitoring not initialized"}
268
+
269
+ # Perform immediate health check
270
+ health_result = await self._health_monitor.check_health()
271
+
272
+ return {
273
+ "health_check": {
274
+ "is_healthy": health_result.is_healthy,
275
+ "response_time_ms": health_result.response_time_ms,
276
+ "error_message": health_result.error_message,
277
+ "timestamp": health_result.timestamp.isoformat()
278
+ },
279
+ "overall_status": self._health_monitor.get_health_status(),
280
+ "recent_history": self._health_monitor.get_recent_history()
281
+ }
282
+
283
+ @asynccontextmanager
284
+ async def get_immediate_transaction(
285
+ self,
286
+ operation_name: str = "immediate_transaction",
287
+ timeout_seconds: float = 10.0
288
+ ) -> AsyncIterator[aiosqlite.Connection]:
289
+ """
290
+ Get a database connection with BEGIN IMMEDIATE transaction and timeout.
291
+
292
+ This ensures write locks are acquired immediately, preventing lock escalation
293
+ failures that can occur with DEFERRED transactions.
294
+
295
+ Args:
296
+ operation_name: Name of the operation for monitoring
297
+ timeout_seconds: Transaction timeout in seconds
298
+ """
299
+ async with self.get_write_connection_with_retry(operation_name) as conn:
300
+ try:
301
+ # Start immediate transaction with timeout
302
+ async with asyncio.timeout(timeout_seconds):
303
+ await conn.execute("BEGIN IMMEDIATE")
304
+ yield conn
305
+ await conn.commit()
306
+ except asyncio.TimeoutError:
307
+ logger.warning(
308
+ f"Transaction timeout after {timeout_seconds}s for {operation_name}",
309
+ extra={
310
+ "structured_data": {
311
+ "transaction_timeout": {
312
+ "operation": operation_name,
313
+ "timeout_seconds": timeout_seconds
314
+ }
315
+ }
316
+ }
317
+ )
318
+ await conn.rollback()
319
+ raise
320
+ except Exception as e:
321
+ logger.error(f"Transaction failed for {operation_name}: {e}")
322
+ await conn.rollback()
323
+ raise
324
+
325
+ async def execute_transaction_with_retry(
326
+ self,
327
+ operation_func,
328
+ operation_name: str = "transaction_operation",
329
+ max_retries: int = 3,
330
+ timeout_seconds: float = 10.0
331
+ ) -> Any:
332
+ """
333
+ Execute a database operation within a transaction with automatic retry.
334
+
335
+ Args:
336
+ operation_func: Async function that takes a connection and performs the operation
337
+ operation_name: Name of the operation for logging
338
+ max_retries: Maximum retry attempts
339
+ timeout_seconds: Transaction timeout in seconds
340
+
341
+ Returns:
342
+ Result from operation_func
343
+
344
+ Example:
345
+ async def my_operation(conn):
346
+ await conn.execute("INSERT INTO ...", (...))
347
+ return "success"
348
+
349
+ result = await db.execute_transaction_with_retry(my_operation, "insert_data")
350
+ """
351
+ last_error = None
352
+
353
+ for attempt in range(1, max_retries + 1):
354
+ try:
355
+ async with self.get_immediate_transaction(operation_name, timeout_seconds) as conn:
356
+ result = await operation_func(conn)
357
+
358
+ # Record successful operation metrics
359
+ if self._metrics_collector:
360
+ self._metrics_collector.record_operation(
361
+ operation_name,
362
+ timeout_seconds * 1000, # Convert to ms
363
+ True,
364
+ len(self._connection_pool)
365
+ )
366
+
367
+ return result
368
+
369
+ except (aiosqlite.OperationalError, asyncio.TimeoutError) as e:
370
+ last_error = e
371
+
372
+ # Record locking event for metrics
373
+ if self._metrics_collector and "locked" in str(e).lower():
374
+ self._metrics_collector.record_locking_event(operation_name, str(e))
375
+
376
+ if attempt < max_retries:
377
+ # Exponential backoff with jitter
378
+ delay = 0.1 * (2 ** (attempt - 1))
379
+ jitter = delay * 0.1 * (2 * random.random() - 1) # ±10% jitter
380
+ wait_time = max(0.05, delay + jitter)
381
+
382
+ logger.warning(
383
+ f"Transaction attempt {attempt} failed for {operation_name}, retrying in {wait_time:.2f}s: {e}",
384
+ extra={
385
+ "structured_data": {
386
+ "transaction_retry": {
387
+ "operation": operation_name,
388
+ "attempt": attempt,
389
+ "delay_seconds": wait_time,
390
+ "error": str(e)
391
+ }
392
+ }
393
+ }
394
+ )
395
+ await asyncio.sleep(wait_time)
396
+ else:
397
+ # Record failed operation metrics
398
+ if self._metrics_collector:
399
+ self._metrics_collector.record_operation(
400
+ operation_name,
401
+ timeout_seconds * 1000,
402
+ False,
403
+ len(self._connection_pool)
404
+ )
405
+
406
+ logger.error(
407
+ f"Transaction failed after {max_retries} attempts for {operation_name}: {e}",
408
+ extra={
409
+ "structured_data": {
410
+ "transaction_failure": {
411
+ "operation": operation_name,
412
+ "max_retries": max_retries,
413
+ "final_error": str(e)
414
+ }
415
+ }
416
+ }
417
+ )
418
+
419
+ raise last_error
420
+
114
421
  # Project operations
115
422
 
116
423
  async def create_project(self, project: Project) -> None:
117
424
  """Create a new project record."""
118
- async with self.get_connection() as db:
425
+ async with self.get_write_connection() as db:
119
426
  await db.execute(
120
427
  """
121
428
  INSERT INTO projects (id, name, remote_origin, upstream_origin, aliases, created, last_accessed)
@@ -302,7 +609,7 @@ class DatabaseManager:
302
609
 
303
610
  async def update_project_access_time(self, project_id: str) -> None:
304
611
  """Update the last accessed time for a project."""
305
- async with self.get_connection() as db:
612
+ async with self.get_write_connection() as db:
306
613
  await db.execute(
307
614
  "UPDATE projects SET last_accessed = ? WHERE id = ?",
308
615
  (datetime.utcnow(), project_id)
@@ -311,7 +618,7 @@ class DatabaseManager:
311
618
 
312
619
  async def update_project(self, project: Project) -> None:
313
620
  """Update an existing project record."""
314
- async with self.get_connection() as db:
621
+ async with self.get_write_connection() as db:
315
622
  await db.execute(
316
623
  """
317
624
  UPDATE projects
@@ -373,7 +680,7 @@ class DatabaseManager:
373
680
 
374
681
  async def create_file_description(self, file_desc: FileDescription) -> None:
375
682
  """Create or update a file description."""
376
- async with self.get_connection() as db:
683
+ async with self.get_write_connection() as db:
377
684
  await db.execute(
378
685
  """
379
686
  INSERT OR REPLACE INTO file_descriptions
@@ -456,11 +763,11 @@ class DatabaseManager:
456
763
  ]
457
764
 
458
765
  async def batch_create_file_descriptions(self, file_descriptions: List[FileDescription]) -> None:
459
- """Batch create multiple file descriptions efficiently."""
766
+ """Batch create multiple file descriptions efficiently with optimized transactions."""
460
767
  if not file_descriptions:
461
768
  return
462
-
463
- async with self.get_connection() as db:
769
+
770
+ async def batch_operation(conn: aiosqlite.Connection) -> None:
464
771
  data = [
465
772
  (
466
773
  fd.project_id,
@@ -475,7 +782,7 @@ class DatabaseManager:
475
782
  for fd in file_descriptions
476
783
  ]
477
784
 
478
- await db.executemany(
785
+ await conn.executemany(
479
786
  """
480
787
  INSERT OR REPLACE INTO file_descriptions
481
788
  (project_id, branch, file_path, description, file_hash, last_modified, version, source_project_id)
@@ -483,8 +790,13 @@ class DatabaseManager:
483
790
  """,
484
791
  data
485
792
  )
486
- await db.commit()
487
793
  logger.debug(f"Batch created {len(file_descriptions)} file descriptions")
794
+
795
+ await self.execute_transaction_with_retry(
796
+ batch_operation,
797
+ f"batch_create_file_descriptions_{len(file_descriptions)}_files",
798
+ timeout_seconds=30.0 # Longer timeout for batch operations
799
+ )
488
800
 
489
801
  # Search operations
490
802
 
@@ -552,7 +864,7 @@ class DatabaseManager:
552
864
  """Cache token count with TTL."""
553
865
  expires = datetime.utcnow() + timedelta(hours=ttl_hours)
554
866
 
555
- async with self.get_connection() as db:
867
+ async with self.get_write_connection() as db:
556
868
  await db.execute(
557
869
  """
558
870
  INSERT OR REPLACE INTO token_cache (cache_key, token_count, expires)
@@ -564,7 +876,7 @@ class DatabaseManager:
564
876
 
565
877
  async def cleanup_expired_cache(self) -> None:
566
878
  """Remove expired cache entries."""
567
- async with self.get_connection() as db:
879
+ async with self.get_write_connection() as db:
568
880
  await db.execute(
569
881
  "DELETE FROM token_cache WHERE expires < ?",
570
882
  (datetime.utcnow(),)
@@ -663,7 +975,7 @@ class DatabaseManager:
663
975
 
664
976
  async def create_project_overview(self, overview: ProjectOverview) -> None:
665
977
  """Create or update a project overview."""
666
- async with self.get_connection() as db:
978
+ async with self.get_write_connection() as db:
667
979
  await db.execute(
668
980
  """
669
981
  INSERT OR REPLACE INTO project_overviews
@@ -716,9 +1028,9 @@ class DatabaseManager:
716
1028
  """
717
1029
  removed_files = []
718
1030
 
719
- async with self.get_connection() as db:
1031
+ async def cleanup_operation(conn: aiosqlite.Connection) -> List[str]:
720
1032
  # Get all file descriptions for this project/branch
721
- cursor = await db.execute(
1033
+ cursor = await conn.execute(
722
1034
  "SELECT file_path FROM file_descriptions WHERE project_id = ? AND branch = ?",
723
1035
  (project_id, branch)
724
1036
  )
@@ -733,16 +1045,22 @@ class DatabaseManager:
733
1045
 
734
1046
  if not full_path.exists():
735
1047
  to_remove.append(file_path)
736
- removed_files.append(file_path)
737
1048
 
738
1049
  # Remove descriptions for missing files
739
1050
  if to_remove:
740
- await db.executemany(
1051
+ await conn.executemany(
741
1052
  "DELETE FROM file_descriptions WHERE project_id = ? AND branch = ? AND file_path = ?",
742
1053
  [(project_id, branch, path) for path in to_remove]
743
1054
  )
744
- await db.commit()
745
1055
  logger.info(f"Cleaned up {len(to_remove)} missing files from {project_id}/{branch}")
1056
+
1057
+ return to_remove
1058
+
1059
+ removed_files = await self.execute_transaction_with_retry(
1060
+ cleanup_operation,
1061
+ f"cleanup_missing_files_{project_id}_{branch}",
1062
+ timeout_seconds=60.0 # Longer timeout for file system operations
1063
+ )
746
1064
 
747
1065
  return removed_files
748
1066
 
@@ -820,7 +1138,7 @@ class DatabaseManager:
820
1138
  Returns:
821
1139
  Number of projects removed
822
1140
  """
823
- async with self.get_connection() as db:
1141
+ async with self.get_write_connection() as db:
824
1142
  # Find projects with no descriptions and no overview
825
1143
  cursor = await db.execute("""
826
1144
  SELECT p.id, p.name