mcp-code-indexer 1.9.0__py3-none-any.whl → 2.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mcp_code_indexer/database/connection_health.py +441 -0
- mcp_code_indexer/database/database.py +344 -26
- mcp_code_indexer/database/retry_handler.py +344 -0
- mcp_code_indexer/git_hook_handler.py +1 -1
- mcp_code_indexer/logging_config.py +29 -0
- mcp_code_indexer/middleware/error_middleware.py +41 -0
- mcp_code_indexer/server/mcp_server.py +49 -2
- {mcp_code_indexer-1.9.0.dist-info → mcp_code_indexer-2.0.0.dist-info}/METADATA +68 -15
- {mcp_code_indexer-1.9.0.dist-info → mcp_code_indexer-2.0.0.dist-info}/RECORD +13 -11
- {mcp_code_indexer-1.9.0.dist-info → mcp_code_indexer-2.0.0.dist-info}/WHEEL +0 -0
- {mcp_code_indexer-1.9.0.dist-info → mcp_code_indexer-2.0.0.dist-info}/entry_points.txt +0 -0
- {mcp_code_indexer-1.9.0.dist-info → mcp_code_indexer-2.0.0.dist-info}/licenses/LICENSE +0 -0
- {mcp_code_indexer-1.9.0.dist-info → mcp_code_indexer-2.0.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,441 @@
|
|
1
|
+
"""
|
2
|
+
Database connection health monitoring and metrics collection.
|
3
|
+
|
4
|
+
This module provides proactive monitoring of database connections with automatic
|
5
|
+
pool refresh capabilities and performance metrics tracking.
|
6
|
+
"""
|
7
|
+
|
8
|
+
import asyncio
|
9
|
+
import logging
|
10
|
+
import time
|
11
|
+
from dataclasses import dataclass, field
|
12
|
+
from datetime import datetime, timedelta
|
13
|
+
from typing import Dict, Optional, List
|
14
|
+
|
15
|
+
import aiosqlite
|
16
|
+
|
17
|
+
logger = logging.getLogger(__name__)
|
18
|
+
|
19
|
+
|
20
|
+
@dataclass
|
21
|
+
class HealthCheckResult:
|
22
|
+
"""Result of a database health check."""
|
23
|
+
is_healthy: bool
|
24
|
+
response_time_ms: float
|
25
|
+
error_message: Optional[str] = None
|
26
|
+
timestamp: datetime = field(default_factory=datetime.utcnow)
|
27
|
+
|
28
|
+
|
29
|
+
@dataclass
|
30
|
+
class ConnectionMetrics:
|
31
|
+
"""Metrics for database connection monitoring."""
|
32
|
+
total_checks: int = 0
|
33
|
+
successful_checks: int = 0
|
34
|
+
failed_checks: int = 0
|
35
|
+
consecutive_failures: int = 0
|
36
|
+
avg_response_time_ms: float = 0.0
|
37
|
+
last_check_time: Optional[datetime] = None
|
38
|
+
last_success_time: Optional[datetime] = None
|
39
|
+
last_failure_time: Optional[datetime] = None
|
40
|
+
pool_refreshes: int = 0
|
41
|
+
|
42
|
+
|
43
|
+
class ConnectionHealthMonitor:
|
44
|
+
"""Monitors database connection health with periodic checks and metrics."""
|
45
|
+
|
46
|
+
def __init__(
|
47
|
+
self,
|
48
|
+
database_manager,
|
49
|
+
check_interval: float = 30.0,
|
50
|
+
failure_threshold: int = 3,
|
51
|
+
timeout_seconds: float = 5.0
|
52
|
+
):
|
53
|
+
"""
|
54
|
+
Initialize connection health monitor.
|
55
|
+
|
56
|
+
Args:
|
57
|
+
database_manager: DatabaseManager instance to monitor
|
58
|
+
check_interval: Health check interval in seconds
|
59
|
+
failure_threshold: Consecutive failures before pool refresh
|
60
|
+
timeout_seconds: Timeout for health check queries
|
61
|
+
"""
|
62
|
+
self.database_manager = database_manager
|
63
|
+
self.check_interval = check_interval
|
64
|
+
self.failure_threshold = failure_threshold
|
65
|
+
self.timeout_seconds = timeout_seconds
|
66
|
+
|
67
|
+
self.metrics = ConnectionMetrics()
|
68
|
+
self._monitoring_task: Optional[asyncio.Task] = None
|
69
|
+
self._is_monitoring = False
|
70
|
+
self._health_history: List[HealthCheckResult] = []
|
71
|
+
self._max_history_size = 100
|
72
|
+
|
73
|
+
async def start_monitoring(self) -> None:
|
74
|
+
"""Start periodic health monitoring."""
|
75
|
+
if self._is_monitoring:
|
76
|
+
logger.warning("Health monitoring is already running")
|
77
|
+
return
|
78
|
+
|
79
|
+
self._is_monitoring = True
|
80
|
+
self._monitoring_task = asyncio.create_task(self._monitoring_loop())
|
81
|
+
logger.info(
|
82
|
+
f"Started database health monitoring with {self.check_interval}s interval",
|
83
|
+
extra={
|
84
|
+
"structured_data": {
|
85
|
+
"health_monitoring": {
|
86
|
+
"action": "started",
|
87
|
+
"check_interval": self.check_interval,
|
88
|
+
"failure_threshold": self.failure_threshold
|
89
|
+
}
|
90
|
+
}
|
91
|
+
}
|
92
|
+
)
|
93
|
+
|
94
|
+
async def stop_monitoring(self) -> None:
|
95
|
+
"""Stop periodic health monitoring."""
|
96
|
+
if not self._is_monitoring:
|
97
|
+
return
|
98
|
+
|
99
|
+
self._is_monitoring = False
|
100
|
+
if self._monitoring_task:
|
101
|
+
self._monitoring_task.cancel()
|
102
|
+
try:
|
103
|
+
await self._monitoring_task
|
104
|
+
except asyncio.CancelledError:
|
105
|
+
pass
|
106
|
+
self._monitoring_task = None
|
107
|
+
|
108
|
+
logger.info("Stopped database health monitoring")
|
109
|
+
|
110
|
+
async def _monitoring_loop(self) -> None:
|
111
|
+
"""Main monitoring loop that runs periodic health checks."""
|
112
|
+
while self._is_monitoring:
|
113
|
+
try:
|
114
|
+
# Perform health check
|
115
|
+
health_result = await self.check_health()
|
116
|
+
|
117
|
+
# Update metrics
|
118
|
+
self._update_metrics(health_result)
|
119
|
+
|
120
|
+
# Store in history
|
121
|
+
self._add_to_history(health_result)
|
122
|
+
|
123
|
+
# Check if pool refresh is needed
|
124
|
+
if self.metrics.consecutive_failures >= self.failure_threshold:
|
125
|
+
await self._handle_persistent_failures()
|
126
|
+
|
127
|
+
# Log periodic health status
|
128
|
+
if self.metrics.total_checks % 10 == 0: # Every 10 checks
|
129
|
+
self._log_health_summary()
|
130
|
+
|
131
|
+
except Exception as e:
|
132
|
+
logger.error(f"Error in health monitoring loop: {e}")
|
133
|
+
|
134
|
+
# Wait for next check
|
135
|
+
await asyncio.sleep(self.check_interval)
|
136
|
+
|
137
|
+
async def check_health(self) -> HealthCheckResult:
|
138
|
+
"""
|
139
|
+
Perform a single health check on the database.
|
140
|
+
|
141
|
+
Returns:
|
142
|
+
HealthCheckResult with check status and timing
|
143
|
+
"""
|
144
|
+
start_time = time.time()
|
145
|
+
|
146
|
+
try:
|
147
|
+
# Use a timeout for the health check
|
148
|
+
async with asyncio.timeout(self.timeout_seconds):
|
149
|
+
async with self.database_manager.get_connection() as conn:
|
150
|
+
# Simple query to test connectivity
|
151
|
+
cursor = await conn.execute("SELECT 1")
|
152
|
+
result = await cursor.fetchone()
|
153
|
+
|
154
|
+
if result and result[0] == 1:
|
155
|
+
response_time = (time.time() - start_time) * 1000
|
156
|
+
return HealthCheckResult(
|
157
|
+
is_healthy=True,
|
158
|
+
response_time_ms=response_time
|
159
|
+
)
|
160
|
+
else:
|
161
|
+
return HealthCheckResult(
|
162
|
+
is_healthy=False,
|
163
|
+
response_time_ms=(time.time() - start_time) * 1000,
|
164
|
+
error_message="Unexpected query result"
|
165
|
+
)
|
166
|
+
|
167
|
+
except asyncio.TimeoutError:
|
168
|
+
return HealthCheckResult(
|
169
|
+
is_healthy=False,
|
170
|
+
response_time_ms=(time.time() - start_time) * 1000,
|
171
|
+
error_message=f"Health check timeout after {self.timeout_seconds}s"
|
172
|
+
)
|
173
|
+
|
174
|
+
except Exception as e:
|
175
|
+
return HealthCheckResult(
|
176
|
+
is_healthy=False,
|
177
|
+
response_time_ms=(time.time() - start_time) * 1000,
|
178
|
+
error_message=str(e)
|
179
|
+
)
|
180
|
+
|
181
|
+
def _update_metrics(self, health_result: HealthCheckResult) -> None:
|
182
|
+
"""Update connection metrics based on health check result."""
|
183
|
+
self.metrics.total_checks += 1
|
184
|
+
self.metrics.last_check_time = health_result.timestamp
|
185
|
+
|
186
|
+
if health_result.is_healthy:
|
187
|
+
self.metrics.successful_checks += 1
|
188
|
+
self.metrics.consecutive_failures = 0
|
189
|
+
self.metrics.last_success_time = health_result.timestamp
|
190
|
+
else:
|
191
|
+
self.metrics.failed_checks += 1
|
192
|
+
self.metrics.consecutive_failures += 1
|
193
|
+
self.metrics.last_failure_time = health_result.timestamp
|
194
|
+
|
195
|
+
# Update average response time
|
196
|
+
if self.metrics.total_checks > 0:
|
197
|
+
current_avg = self.metrics.avg_response_time_ms
|
198
|
+
new_avg = (
|
199
|
+
(current_avg * (self.metrics.total_checks - 1) + health_result.response_time_ms)
|
200
|
+
/ self.metrics.total_checks
|
201
|
+
)
|
202
|
+
self.metrics.avg_response_time_ms = new_avg
|
203
|
+
|
204
|
+
def _add_to_history(self, health_result: HealthCheckResult) -> None:
|
205
|
+
"""Add health check result to history, maintaining size limit."""
|
206
|
+
self._health_history.append(health_result)
|
207
|
+
|
208
|
+
# Trim history if it exceeds max size
|
209
|
+
if len(self._health_history) > self._max_history_size:
|
210
|
+
self._health_history = self._health_history[-self._max_history_size:]
|
211
|
+
|
212
|
+
async def _handle_persistent_failures(self) -> None:
|
213
|
+
"""Handle persistent health check failures by refreshing pool."""
|
214
|
+
logger.warning(
|
215
|
+
f"Detected {self.metrics.consecutive_failures} consecutive failures, refreshing connection pool",
|
216
|
+
extra={
|
217
|
+
"structured_data": {
|
218
|
+
"pool_refresh": {
|
219
|
+
"consecutive_failures": self.metrics.consecutive_failures,
|
220
|
+
"failure_threshold": self.failure_threshold,
|
221
|
+
"action": "pool_refresh_triggered"
|
222
|
+
}
|
223
|
+
}
|
224
|
+
}
|
225
|
+
)
|
226
|
+
|
227
|
+
try:
|
228
|
+
# Refresh the connection pool
|
229
|
+
await self.database_manager.close_pool()
|
230
|
+
self.metrics.pool_refreshes += 1
|
231
|
+
self.metrics.consecutive_failures = 0
|
232
|
+
|
233
|
+
# Perform immediate health check after refresh
|
234
|
+
health_result = await self.check_health()
|
235
|
+
if health_result.is_healthy:
|
236
|
+
logger.info("Connection pool refresh successful, health check passed")
|
237
|
+
else:
|
238
|
+
logger.error(f"Connection pool refresh failed, health check error: {health_result.error_message}")
|
239
|
+
|
240
|
+
except Exception as e:
|
241
|
+
logger.error(f"Failed to refresh connection pool: {e}")
|
242
|
+
|
243
|
+
def _log_health_summary(self) -> None:
|
244
|
+
"""Log a summary of health monitoring statistics."""
|
245
|
+
success_rate = (
|
246
|
+
(self.metrics.successful_checks / self.metrics.total_checks * 100)
|
247
|
+
if self.metrics.total_checks > 0 else 0
|
248
|
+
)
|
249
|
+
|
250
|
+
logger.info(
|
251
|
+
f"Health monitoring summary: {success_rate:.1f}% success rate over {self.metrics.total_checks} checks",
|
252
|
+
extra={
|
253
|
+
"structured_data": {
|
254
|
+
"health_summary": {
|
255
|
+
"total_checks": self.metrics.total_checks,
|
256
|
+
"success_rate_percent": success_rate,
|
257
|
+
"avg_response_time_ms": self.metrics.avg_response_time_ms,
|
258
|
+
"consecutive_failures": self.metrics.consecutive_failures,
|
259
|
+
"pool_refreshes": self.metrics.pool_refreshes
|
260
|
+
}
|
261
|
+
}
|
262
|
+
}
|
263
|
+
)
|
264
|
+
|
265
|
+
def get_health_status(self) -> Dict:
|
266
|
+
"""
|
267
|
+
Get current health status and metrics.
|
268
|
+
|
269
|
+
Returns:
|
270
|
+
Dictionary with health status, metrics, and recent history
|
271
|
+
"""
|
272
|
+
# Get recent health status (last 5 checks)
|
273
|
+
recent_checks = self._health_history[-5:] if self._health_history else []
|
274
|
+
recent_success_rate = (
|
275
|
+
sum(1 for check in recent_checks if check.is_healthy) / len(recent_checks) * 100
|
276
|
+
if recent_checks else 0
|
277
|
+
)
|
278
|
+
|
279
|
+
return {
|
280
|
+
"is_monitoring": self._is_monitoring,
|
281
|
+
"current_status": {
|
282
|
+
"is_healthy": (
|
283
|
+
recent_checks[-1].is_healthy if recent_checks else True
|
284
|
+
),
|
285
|
+
"consecutive_failures": self.metrics.consecutive_failures,
|
286
|
+
"recent_success_rate_percent": recent_success_rate
|
287
|
+
},
|
288
|
+
"metrics": {
|
289
|
+
"total_checks": self.metrics.total_checks,
|
290
|
+
"successful_checks": self.metrics.successful_checks,
|
291
|
+
"failed_checks": self.metrics.failed_checks,
|
292
|
+
"avg_response_time_ms": self.metrics.avg_response_time_ms,
|
293
|
+
"pool_refreshes": self.metrics.pool_refreshes,
|
294
|
+
"last_check_time": self.metrics.last_check_time.isoformat() if self.metrics.last_check_time else None,
|
295
|
+
"last_success_time": self.metrics.last_success_time.isoformat() if self.metrics.last_success_time else None,
|
296
|
+
"last_failure_time": self.metrics.last_failure_time.isoformat() if self.metrics.last_failure_time else None
|
297
|
+
},
|
298
|
+
"configuration": {
|
299
|
+
"check_interval": self.check_interval,
|
300
|
+
"failure_threshold": self.failure_threshold,
|
301
|
+
"timeout_seconds": self.timeout_seconds
|
302
|
+
}
|
303
|
+
}
|
304
|
+
|
305
|
+
def get_recent_history(self, count: int = 10) -> List[Dict]:
|
306
|
+
"""
|
307
|
+
Get recent health check history.
|
308
|
+
|
309
|
+
Args:
|
310
|
+
count: Number of recent checks to return
|
311
|
+
|
312
|
+
Returns:
|
313
|
+
List of health check results as dictionaries
|
314
|
+
"""
|
315
|
+
recent_checks = self._health_history[-count:] if self._health_history else []
|
316
|
+
return [
|
317
|
+
{
|
318
|
+
"timestamp": check.timestamp.isoformat(),
|
319
|
+
"is_healthy": check.is_healthy,
|
320
|
+
"response_time_ms": check.response_time_ms,
|
321
|
+
"error_message": check.error_message
|
322
|
+
}
|
323
|
+
for check in recent_checks
|
324
|
+
]
|
325
|
+
|
326
|
+
|
327
|
+
class DatabaseMetricsCollector:
|
328
|
+
"""Collects and aggregates database performance metrics."""
|
329
|
+
|
330
|
+
def __init__(self):
|
331
|
+
"""Initialize metrics collector."""
|
332
|
+
self._operation_metrics = {}
|
333
|
+
self._locking_events = []
|
334
|
+
self._max_events_history = 50
|
335
|
+
|
336
|
+
def record_operation(
|
337
|
+
self,
|
338
|
+
operation_name: str,
|
339
|
+
duration_ms: float,
|
340
|
+
success: bool,
|
341
|
+
connection_pool_size: int
|
342
|
+
) -> None:
|
343
|
+
"""
|
344
|
+
Record a database operation for metrics.
|
345
|
+
|
346
|
+
Args:
|
347
|
+
operation_name: Name of the database operation
|
348
|
+
duration_ms: Operation duration in milliseconds
|
349
|
+
success: Whether the operation succeeded
|
350
|
+
connection_pool_size: Current connection pool size
|
351
|
+
"""
|
352
|
+
if operation_name not in self._operation_metrics:
|
353
|
+
self._operation_metrics[operation_name] = {
|
354
|
+
"total_operations": 0,
|
355
|
+
"successful_operations": 0,
|
356
|
+
"failed_operations": 0,
|
357
|
+
"total_duration_ms": 0.0,
|
358
|
+
"avg_duration_ms": 0.0,
|
359
|
+
"min_duration_ms": float('inf'),
|
360
|
+
"max_duration_ms": 0.0
|
361
|
+
}
|
362
|
+
|
363
|
+
metrics = self._operation_metrics[operation_name]
|
364
|
+
metrics["total_operations"] += 1
|
365
|
+
metrics["total_duration_ms"] += duration_ms
|
366
|
+
|
367
|
+
if success:
|
368
|
+
metrics["successful_operations"] += 1
|
369
|
+
else:
|
370
|
+
metrics["failed_operations"] += 1
|
371
|
+
|
372
|
+
# Update duration statistics
|
373
|
+
metrics["avg_duration_ms"] = metrics["total_duration_ms"] / metrics["total_operations"]
|
374
|
+
metrics["min_duration_ms"] = min(metrics["min_duration_ms"], duration_ms)
|
375
|
+
metrics["max_duration_ms"] = max(metrics["max_duration_ms"], duration_ms)
|
376
|
+
|
377
|
+
def record_locking_event(self, operation_name: str, error_message: str) -> None:
|
378
|
+
"""
|
379
|
+
Record a database locking event.
|
380
|
+
|
381
|
+
Args:
|
382
|
+
operation_name: Name of the operation that encountered locking
|
383
|
+
error_message: Error message from the locking event
|
384
|
+
"""
|
385
|
+
event = {
|
386
|
+
"timestamp": datetime.utcnow().isoformat(),
|
387
|
+
"operation_name": operation_name,
|
388
|
+
"error_message": error_message
|
389
|
+
}
|
390
|
+
|
391
|
+
self._locking_events.append(event)
|
392
|
+
|
393
|
+
# Trim history
|
394
|
+
if len(self._locking_events) > self._max_events_history:
|
395
|
+
self._locking_events = self._locking_events[-self._max_events_history:]
|
396
|
+
|
397
|
+
def get_operation_metrics(self) -> Dict:
|
398
|
+
"""Get aggregated operation metrics."""
|
399
|
+
return {
|
400
|
+
operation: metrics.copy()
|
401
|
+
for operation, metrics in self._operation_metrics.items()
|
402
|
+
}
|
403
|
+
|
404
|
+
def get_locking_frequency(self) -> Dict:
|
405
|
+
"""Get locking event frequency statistics."""
|
406
|
+
if not self._locking_events:
|
407
|
+
return {
|
408
|
+
"total_events": 0,
|
409
|
+
"events_last_hour": 0,
|
410
|
+
"most_frequent_operations": []
|
411
|
+
}
|
412
|
+
|
413
|
+
# Count events in last hour
|
414
|
+
one_hour_ago = datetime.utcnow() - timedelta(hours=1)
|
415
|
+
recent_events = [
|
416
|
+
event for event in self._locking_events
|
417
|
+
if datetime.fromisoformat(event["timestamp"]) > one_hour_ago
|
418
|
+
]
|
419
|
+
|
420
|
+
# Count by operation
|
421
|
+
operation_counts = {}
|
422
|
+
for event in self._locking_events:
|
423
|
+
op = event["operation_name"]
|
424
|
+
operation_counts[op] = operation_counts.get(op, 0) + 1
|
425
|
+
|
426
|
+
# Sort by frequency
|
427
|
+
most_frequent = sorted(
|
428
|
+
operation_counts.items(),
|
429
|
+
key=lambda x: x[1],
|
430
|
+
reverse=True
|
431
|
+
)[:5]
|
432
|
+
|
433
|
+
return {
|
434
|
+
"total_events": len(self._locking_events),
|
435
|
+
"events_last_hour": len(recent_events),
|
436
|
+
"most_frequent_operations": [
|
437
|
+
{"operation": op, "count": count}
|
438
|
+
for op, count in most_frequent
|
439
|
+
],
|
440
|
+
"recent_events": self._locking_events[-10:] # Last 10 events
|
441
|
+
}
|