mcp-code-indexer 3.1.4__py3-none-any.whl → 3.1.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mcp_code_indexer/__init__.py +8 -6
- mcp_code_indexer/ask_handler.py +105 -75
- mcp_code_indexer/claude_api_handler.py +125 -82
- mcp_code_indexer/cleanup_manager.py +107 -81
- mcp_code_indexer/database/connection_health.py +230 -161
- mcp_code_indexer/database/database.py +529 -415
- mcp_code_indexer/database/exceptions.py +167 -118
- mcp_code_indexer/database/models.py +54 -19
- mcp_code_indexer/database/retry_executor.py +139 -103
- mcp_code_indexer/deepask_handler.py +178 -140
- mcp_code_indexer/error_handler.py +88 -76
- mcp_code_indexer/file_scanner.py +163 -141
- mcp_code_indexer/git_hook_handler.py +352 -261
- mcp_code_indexer/logging_config.py +76 -94
- mcp_code_indexer/main.py +406 -320
- mcp_code_indexer/middleware/error_middleware.py +106 -71
- mcp_code_indexer/query_preprocessor.py +40 -40
- mcp_code_indexer/server/mcp_server.py +785 -470
- mcp_code_indexer/token_counter.py +54 -47
- {mcp_code_indexer-3.1.4.dist-info → mcp_code_indexer-3.1.6.dist-info}/METADATA +3 -3
- mcp_code_indexer-3.1.6.dist-info/RECORD +37 -0
- mcp_code_indexer-3.1.4.dist-info/RECORD +0 -37
- {mcp_code_indexer-3.1.4.dist-info → mcp_code_indexer-3.1.6.dist-info}/WHEEL +0 -0
- {mcp_code_indexer-3.1.4.dist-info → mcp_code_indexer-3.1.6.dist-info}/entry_points.txt +0 -0
- {mcp_code_indexer-3.1.4.dist-info → mcp_code_indexer-3.1.6.dist-info}/licenses/LICENSE +0 -0
- {mcp_code_indexer-3.1.4.dist-info → mcp_code_indexer-3.1.6.dist-info}/top_level.txt +0 -0
@@ -12,14 +12,13 @@ from dataclasses import dataclass, field
|
|
12
12
|
from datetime import datetime, timedelta
|
13
13
|
from typing import Dict, Optional, List
|
14
14
|
|
15
|
-
import aiosqlite
|
16
|
-
|
17
15
|
logger = logging.getLogger(__name__)
|
18
16
|
|
19
17
|
|
20
18
|
@dataclass
|
21
19
|
class HealthCheckResult:
|
22
20
|
"""Result of a database health check."""
|
21
|
+
|
23
22
|
is_healthy: bool
|
24
23
|
response_time_ms: float
|
25
24
|
error_message: Optional[str] = None
|
@@ -29,6 +28,7 @@ class HealthCheckResult:
|
|
29
28
|
@dataclass
|
30
29
|
class ConnectionMetrics:
|
31
30
|
"""Metrics for database connection monitoring."""
|
31
|
+
|
32
32
|
total_checks: int = 0
|
33
33
|
successful_checks: int = 0
|
34
34
|
failed_checks: int = 0
|
@@ -42,17 +42,17 @@ class ConnectionMetrics:
|
|
42
42
|
|
43
43
|
class ConnectionHealthMonitor:
|
44
44
|
"""Monitors database connection health with periodic checks and metrics."""
|
45
|
-
|
45
|
+
|
46
46
|
def __init__(
|
47
47
|
self,
|
48
48
|
database_manager,
|
49
49
|
check_interval: float = 30.0,
|
50
50
|
failure_threshold: int = 3,
|
51
|
-
timeout_seconds: float = 5.0
|
51
|
+
timeout_seconds: float = 5.0,
|
52
52
|
):
|
53
53
|
"""
|
54
54
|
Initialize connection health monitor.
|
55
|
-
|
55
|
+
|
56
56
|
Args:
|
57
57
|
database_manager: DatabaseManager instance to monitor
|
58
58
|
check_interval: Health check interval in seconds
|
@@ -63,39 +63,42 @@ class ConnectionHealthMonitor:
|
|
63
63
|
self.check_interval = check_interval
|
64
64
|
self.failure_threshold = failure_threshold
|
65
65
|
self.timeout_seconds = timeout_seconds
|
66
|
-
|
66
|
+
|
67
67
|
self.metrics = ConnectionMetrics()
|
68
68
|
self._monitoring_task: Optional[asyncio.Task] = None
|
69
69
|
self._is_monitoring = False
|
70
70
|
self._health_history: List[HealthCheckResult] = []
|
71
71
|
self._max_history_size = 100
|
72
|
-
|
72
|
+
|
73
73
|
async def start_monitoring(self) -> None:
|
74
74
|
"""Start periodic health monitoring."""
|
75
75
|
if self._is_monitoring:
|
76
76
|
logger.warning("Health monitoring is already running")
|
77
77
|
return
|
78
|
-
|
78
|
+
|
79
79
|
self._is_monitoring = True
|
80
80
|
self._monitoring_task = asyncio.create_task(self._monitoring_loop())
|
81
81
|
logger.info(
|
82
|
-
|
82
|
+
(
|
83
|
+
f"Started database health monitoring with "
|
84
|
+
f"{self.check_interval}s interval"
|
85
|
+
),
|
83
86
|
extra={
|
84
87
|
"structured_data": {
|
85
88
|
"health_monitoring": {
|
86
89
|
"action": "started",
|
87
90
|
"check_interval": self.check_interval,
|
88
|
-
"failure_threshold": self.failure_threshold
|
91
|
+
"failure_threshold": self.failure_threshold,
|
89
92
|
}
|
90
93
|
}
|
91
|
-
}
|
94
|
+
},
|
92
95
|
)
|
93
|
-
|
96
|
+
|
94
97
|
async def stop_monitoring(self) -> None:
|
95
98
|
"""Stop periodic health monitoring."""
|
96
99
|
if not self._is_monitoring:
|
97
100
|
return
|
98
|
-
|
101
|
+
|
99
102
|
self._is_monitoring = False
|
100
103
|
if self._monitoring_task:
|
101
104
|
self._monitoring_task.cancel()
|
@@ -104,45 +107,45 @@ class ConnectionHealthMonitor:
|
|
104
107
|
except asyncio.CancelledError:
|
105
108
|
pass
|
106
109
|
self._monitoring_task = None
|
107
|
-
|
110
|
+
|
108
111
|
logger.info("Stopped database health monitoring")
|
109
|
-
|
112
|
+
|
110
113
|
async def _monitoring_loop(self) -> None:
|
111
114
|
"""Main monitoring loop that runs periodic health checks."""
|
112
115
|
while self._is_monitoring:
|
113
116
|
try:
|
114
117
|
# Perform health check
|
115
118
|
health_result = await self.check_health()
|
116
|
-
|
119
|
+
|
117
120
|
# Update metrics
|
118
121
|
self._update_metrics(health_result)
|
119
|
-
|
122
|
+
|
120
123
|
# Store in history
|
121
124
|
self._add_to_history(health_result)
|
122
|
-
|
125
|
+
|
123
126
|
# Check if pool refresh is needed
|
124
127
|
if self.metrics.consecutive_failures >= self.failure_threshold:
|
125
128
|
await self._handle_persistent_failures()
|
126
|
-
|
129
|
+
|
127
130
|
# Log periodic health status
|
128
131
|
if self.metrics.total_checks % 10 == 0: # Every 10 checks
|
129
132
|
self._log_health_summary()
|
130
|
-
|
133
|
+
|
131
134
|
except Exception as e:
|
132
135
|
logger.error(f"Error in health monitoring loop: {e}")
|
133
|
-
|
136
|
+
|
134
137
|
# Wait for next check
|
135
138
|
await asyncio.sleep(self.check_interval)
|
136
|
-
|
139
|
+
|
137
140
|
async def check_health(self) -> HealthCheckResult:
|
138
141
|
"""
|
139
142
|
Perform a single health check on the database.
|
140
|
-
|
143
|
+
|
141
144
|
Returns:
|
142
145
|
HealthCheckResult with check status and timing
|
143
146
|
"""
|
144
147
|
start_time = time.time()
|
145
|
-
|
148
|
+
|
146
149
|
try:
|
147
150
|
# Use a timeout for the health check
|
148
151
|
async with asyncio.timeout(self.timeout_seconds):
|
@@ -150,39 +153,38 @@ class ConnectionHealthMonitor:
|
|
150
153
|
# Simple query to test connectivity
|
151
154
|
cursor = await conn.execute("SELECT 1")
|
152
155
|
result = await cursor.fetchone()
|
153
|
-
|
156
|
+
|
154
157
|
if result and result[0] == 1:
|
155
158
|
response_time = (time.time() - start_time) * 1000
|
156
159
|
return HealthCheckResult(
|
157
|
-
is_healthy=True,
|
158
|
-
response_time_ms=response_time
|
160
|
+
is_healthy=True, response_time_ms=response_time
|
159
161
|
)
|
160
162
|
else:
|
161
163
|
return HealthCheckResult(
|
162
164
|
is_healthy=False,
|
163
165
|
response_time_ms=(time.time() - start_time) * 1000,
|
164
|
-
error_message="Unexpected query result"
|
166
|
+
error_message="Unexpected query result",
|
165
167
|
)
|
166
|
-
|
168
|
+
|
167
169
|
except asyncio.TimeoutError:
|
168
170
|
return HealthCheckResult(
|
169
171
|
is_healthy=False,
|
170
172
|
response_time_ms=(time.time() - start_time) * 1000,
|
171
|
-
error_message=f"Health check timeout after {self.timeout_seconds}s"
|
173
|
+
error_message=(f"Health check timeout after {self.timeout_seconds}s"),
|
172
174
|
)
|
173
|
-
|
175
|
+
|
174
176
|
except Exception as e:
|
175
177
|
return HealthCheckResult(
|
176
178
|
is_healthy=False,
|
177
179
|
response_time_ms=(time.time() - start_time) * 1000,
|
178
|
-
error_message=str(e)
|
180
|
+
error_message=str(e),
|
179
181
|
)
|
180
|
-
|
182
|
+
|
181
183
|
def _update_metrics(self, health_result: HealthCheckResult) -> None:
|
182
184
|
"""Update connection metrics based on health check result."""
|
183
185
|
self.metrics.total_checks += 1
|
184
186
|
self.metrics.last_check_time = health_result.timestamp
|
185
|
-
|
187
|
+
|
186
188
|
if health_result.is_healthy:
|
187
189
|
self.metrics.successful_checks += 1
|
188
190
|
self.metrics.consecutive_failures = 0
|
@@ -191,64 +193,74 @@ class ConnectionHealthMonitor:
|
|
191
193
|
self.metrics.failed_checks += 1
|
192
194
|
self.metrics.consecutive_failures += 1
|
193
195
|
self.metrics.last_failure_time = health_result.timestamp
|
194
|
-
|
196
|
+
|
195
197
|
# Update average response time
|
196
198
|
if self.metrics.total_checks > 0:
|
197
199
|
current_avg = self.metrics.avg_response_time_ms
|
198
200
|
new_avg = (
|
199
|
-
|
200
|
-
|
201
|
-
)
|
201
|
+
current_avg * (self.metrics.total_checks - 1)
|
202
|
+
+ health_result.response_time_ms
|
203
|
+
) / self.metrics.total_checks
|
202
204
|
self.metrics.avg_response_time_ms = new_avg
|
203
|
-
|
205
|
+
|
204
206
|
def _add_to_history(self, health_result: HealthCheckResult) -> None:
|
205
207
|
"""Add health check result to history, maintaining size limit."""
|
206
208
|
self._health_history.append(health_result)
|
207
|
-
|
209
|
+
|
208
210
|
# Trim history if it exceeds max size
|
209
211
|
if len(self._health_history) > self._max_history_size:
|
210
|
-
self._health_history = self._health_history[-self._max_history_size:]
|
211
|
-
|
212
|
+
self._health_history = self._health_history[-self._max_history_size :]
|
213
|
+
|
212
214
|
async def _handle_persistent_failures(self) -> None:
|
213
215
|
"""Handle persistent health check failures by refreshing pool."""
|
214
216
|
logger.warning(
|
215
|
-
|
217
|
+
(
|
218
|
+
f"Detected {self.metrics.consecutive_failures} consecutive "
|
219
|
+
f"failures, refreshing connection pool"
|
220
|
+
),
|
216
221
|
extra={
|
217
222
|
"structured_data": {
|
218
223
|
"pool_refresh": {
|
219
224
|
"consecutive_failures": self.metrics.consecutive_failures,
|
220
225
|
"failure_threshold": self.failure_threshold,
|
221
|
-
"action": "pool_refresh_triggered"
|
226
|
+
"action": "pool_refresh_triggered",
|
222
227
|
}
|
223
228
|
}
|
224
|
-
}
|
229
|
+
},
|
225
230
|
)
|
226
|
-
|
231
|
+
|
227
232
|
try:
|
228
233
|
# Refresh the connection pool
|
229
234
|
await self.database_manager.close_pool()
|
230
235
|
self.metrics.pool_refreshes += 1
|
231
236
|
self.metrics.consecutive_failures = 0
|
232
|
-
|
237
|
+
|
233
238
|
# Perform immediate health check after refresh
|
234
239
|
health_result = await self.check_health()
|
235
240
|
if health_result.is_healthy:
|
236
241
|
logger.info("Connection pool refresh successful, health check passed")
|
237
242
|
else:
|
238
|
-
logger.error(
|
239
|
-
|
243
|
+
logger.error(
|
244
|
+
f"Connection pool refresh failed, health check error: "
|
245
|
+
f"{health_result.error_message}"
|
246
|
+
)
|
247
|
+
|
240
248
|
except Exception as e:
|
241
249
|
logger.error(f"Failed to refresh connection pool: {e}")
|
242
|
-
|
250
|
+
|
243
251
|
def _log_health_summary(self) -> None:
|
244
252
|
"""Log a summary of health monitoring statistics."""
|
245
253
|
success_rate = (
|
246
254
|
(self.metrics.successful_checks / self.metrics.total_checks * 100)
|
247
|
-
if self.metrics.total_checks > 0
|
255
|
+
if self.metrics.total_checks > 0
|
256
|
+
else 0
|
248
257
|
)
|
249
|
-
|
258
|
+
|
250
259
|
logger.info(
|
251
|
-
|
260
|
+
(
|
261
|
+
f"Health monitoring summary: {success_rate:.1f}% success rate "
|
262
|
+
f"over {self.metrics.total_checks} checks"
|
263
|
+
),
|
252
264
|
extra={
|
253
265
|
"structured_data": {
|
254
266
|
"health_summary": {
|
@@ -256,37 +268,38 @@ class ConnectionHealthMonitor:
|
|
256
268
|
"success_rate_percent": success_rate,
|
257
269
|
"avg_response_time_ms": self.metrics.avg_response_time_ms,
|
258
270
|
"consecutive_failures": self.metrics.consecutive_failures,
|
259
|
-
"pool_refreshes": self.metrics.pool_refreshes
|
271
|
+
"pool_refreshes": self.metrics.pool_refreshes,
|
260
272
|
}
|
261
273
|
}
|
262
|
-
}
|
274
|
+
},
|
263
275
|
)
|
264
|
-
|
276
|
+
|
265
277
|
def get_health_status(self, include_retry_stats: bool = True) -> Dict:
|
266
278
|
"""
|
267
279
|
Get current health status and metrics.
|
268
|
-
|
280
|
+
|
269
281
|
Args:
|
270
282
|
include_retry_stats: Whether to include retry executor statistics
|
271
|
-
|
283
|
+
|
272
284
|
Returns:
|
273
285
|
Dictionary with health status, metrics, recent history, and retry stats
|
274
286
|
"""
|
275
287
|
# Get recent health status (last 5 checks)
|
276
288
|
recent_checks = self._health_history[-5:] if self._health_history else []
|
277
289
|
recent_success_rate = (
|
278
|
-
sum(1 for check in recent_checks if check.is_healthy)
|
279
|
-
|
290
|
+
sum(1 for check in recent_checks if check.is_healthy)
|
291
|
+
/ len(recent_checks)
|
292
|
+
* 100
|
293
|
+
if recent_checks
|
294
|
+
else 0
|
280
295
|
)
|
281
|
-
|
296
|
+
|
282
297
|
health_status = {
|
283
298
|
"is_monitoring": self._is_monitoring,
|
284
299
|
"current_status": {
|
285
|
-
"is_healthy": (
|
286
|
-
recent_checks[-1].is_healthy if recent_checks else True
|
287
|
-
),
|
300
|
+
"is_healthy": (recent_checks[-1].is_healthy if recent_checks else True),
|
288
301
|
"consecutive_failures": self.metrics.consecutive_failures,
|
289
|
-
"recent_success_rate_percent": recent_success_rate
|
302
|
+
"recent_success_rate_percent": recent_success_rate,
|
290
303
|
},
|
291
304
|
"metrics": {
|
292
305
|
"total_checks": self.metrics.total_checks,
|
@@ -294,40 +307,52 @@ class ConnectionHealthMonitor:
|
|
294
307
|
"failed_checks": self.metrics.failed_checks,
|
295
308
|
"avg_response_time_ms": self.metrics.avg_response_time_ms,
|
296
309
|
"pool_refreshes": self.metrics.pool_refreshes,
|
297
|
-
"last_check_time":
|
298
|
-
|
299
|
-
|
310
|
+
"last_check_time": (
|
311
|
+
self.metrics.last_check_time.isoformat()
|
312
|
+
if self.metrics.last_check_time
|
313
|
+
else None
|
314
|
+
),
|
315
|
+
"last_success_time": (
|
316
|
+
self.metrics.last_success_time.isoformat()
|
317
|
+
if self.metrics.last_success_time
|
318
|
+
else None
|
319
|
+
),
|
320
|
+
"last_failure_time": (
|
321
|
+
self.metrics.last_failure_time.isoformat()
|
322
|
+
if self.metrics.last_failure_time
|
323
|
+
else None
|
324
|
+
),
|
300
325
|
},
|
301
326
|
"configuration": {
|
302
327
|
"check_interval": self.check_interval,
|
303
328
|
"failure_threshold": self.failure_threshold,
|
304
|
-
"timeout_seconds": self.timeout_seconds
|
305
|
-
}
|
329
|
+
"timeout_seconds": self.timeout_seconds,
|
330
|
+
},
|
306
331
|
}
|
307
|
-
|
332
|
+
|
308
333
|
# Include retry executor statistics if available
|
309
|
-
if include_retry_stats and hasattr(self.database_manager,
|
334
|
+
if include_retry_stats and hasattr(self.database_manager, "_retry_executor"):
|
310
335
|
retry_executor = self.database_manager._retry_executor
|
311
336
|
if retry_executor:
|
312
337
|
health_status["retry_statistics"] = retry_executor.get_retry_stats()
|
313
|
-
|
338
|
+
|
314
339
|
# Include database-level statistics if available
|
315
|
-
if hasattr(self.database_manager,
|
340
|
+
if hasattr(self.database_manager, "get_database_stats"):
|
316
341
|
try:
|
317
342
|
db_stats = self.database_manager.get_database_stats()
|
318
343
|
health_status["database_statistics"] = db_stats
|
319
344
|
except Exception as e:
|
320
345
|
logger.warning(f"Failed to get database statistics: {e}")
|
321
|
-
|
346
|
+
|
322
347
|
return health_status
|
323
|
-
|
348
|
+
|
324
349
|
def get_recent_history(self, count: int = 10) -> List[Dict]:
|
325
350
|
"""
|
326
351
|
Get recent health check history.
|
327
|
-
|
352
|
+
|
328
353
|
Args:
|
329
354
|
count: Number of recent checks to return
|
330
|
-
|
355
|
+
|
331
356
|
Returns:
|
332
357
|
List of health check results as dictionaries
|
333
358
|
"""
|
@@ -337,70 +362,78 @@ class ConnectionHealthMonitor:
|
|
337
362
|
"timestamp": check.timestamp.isoformat(),
|
338
363
|
"is_healthy": check.is_healthy,
|
339
364
|
"response_time_ms": check.response_time_ms,
|
340
|
-
"error_message":
|
365
|
+
"error_message": (
|
366
|
+
check.error_message[:500] + "..."
|
367
|
+
if check.error_message and len(check.error_message) > 500
|
368
|
+
else check.error_message
|
369
|
+
),
|
341
370
|
}
|
342
371
|
for check in recent_checks
|
343
372
|
]
|
344
|
-
|
373
|
+
|
345
374
|
def get_comprehensive_diagnostics(self) -> Dict:
|
346
375
|
"""
|
347
376
|
Get comprehensive database health diagnostics for monitoring.
|
348
|
-
|
349
|
-
This method provides detailed diagnostics suitable for the
|
377
|
+
|
378
|
+
This method provides detailed diagnostics suitable for the
|
350
379
|
check_database_health MCP tool.
|
351
|
-
|
380
|
+
|
352
381
|
Returns:
|
353
|
-
Comprehensive health diagnostics including retry metrics,
|
382
|
+
Comprehensive health diagnostics including retry metrics,
|
354
383
|
performance data, and resilience statistics
|
355
384
|
"""
|
356
385
|
# Get base health status with retry stats
|
357
386
|
base_status = self.get_health_status(include_retry_stats=True)
|
358
|
-
|
387
|
+
|
359
388
|
# Add detailed performance analysis
|
360
389
|
diagnostics = {
|
361
390
|
**base_status,
|
362
391
|
"performance_analysis": {
|
363
392
|
"health_check_performance": {
|
364
393
|
"avg_response_time_ms": self.metrics.avg_response_time_ms,
|
365
|
-
"response_time_threshold_exceeded":
|
366
|
-
|
394
|
+
"response_time_threshold_exceeded": (
|
395
|
+
self.metrics.avg_response_time_ms > 100
|
396
|
+
),
|
397
|
+
"recent_performance_trend": self._get_performance_trend(),
|
367
398
|
},
|
368
399
|
"failure_analysis": {
|
369
400
|
"failure_rate_percent": (
|
370
401
|
(self.metrics.failed_checks / self.metrics.total_checks * 100)
|
371
|
-
if self.metrics.total_checks > 0
|
402
|
+
if self.metrics.total_checks > 0
|
403
|
+
else 0
|
372
404
|
),
|
373
405
|
"consecutive_failures": self.metrics.consecutive_failures,
|
374
406
|
"approaching_failure_threshold": (
|
375
407
|
self.metrics.consecutive_failures >= self.failure_threshold - 1
|
376
408
|
),
|
377
|
-
"pool_refresh_frequency": self.metrics.pool_refreshes
|
378
|
-
}
|
409
|
+
"pool_refresh_frequency": self.metrics.pool_refreshes,
|
410
|
+
},
|
379
411
|
},
|
380
412
|
"resilience_indicators": {
|
381
413
|
"overall_health_score": self._calculate_health_score(),
|
382
414
|
"retry_effectiveness": self._analyze_retry_effectiveness(),
|
383
415
|
"connection_stability": self._assess_connection_stability(),
|
384
|
-
"recommendations": self._generate_health_recommendations()
|
416
|
+
"recommendations": self._generate_health_recommendations(),
|
385
417
|
},
|
386
|
-
"recent_history": self.get_recent_history(count=5)
|
418
|
+
"recent_history": self.get_recent_history(count=5),
|
387
419
|
}
|
388
|
-
|
420
|
+
|
389
421
|
return diagnostics
|
390
|
-
|
422
|
+
|
391
423
|
def _get_performance_trend(self) -> str:
|
392
424
|
"""Analyze recent performance trend."""
|
393
425
|
if len(self._health_history) < 5:
|
394
426
|
return "insufficient_data"
|
395
|
-
|
427
|
+
|
396
428
|
recent_times = [
|
397
|
-
check.response_time_ms
|
429
|
+
check.response_time_ms
|
430
|
+
for check in self._health_history[-5:]
|
398
431
|
if check.is_healthy
|
399
432
|
]
|
400
|
-
|
433
|
+
|
401
434
|
if len(recent_times) < 2:
|
402
435
|
return "insufficient_healthy_checks"
|
403
|
-
|
436
|
+
|
404
437
|
# Simple trend analysis
|
405
438
|
if recent_times[-1] > recent_times[0] * 1.5:
|
406
439
|
return "degrading"
|
@@ -408,125 +441,147 @@ class ConnectionHealthMonitor:
|
|
408
441
|
return "improving"
|
409
442
|
else:
|
410
443
|
return "stable"
|
411
|
-
|
444
|
+
|
412
445
|
def _calculate_health_score(self) -> float:
|
413
446
|
"""Calculate overall health score (0-100)."""
|
414
447
|
if self.metrics.total_checks == 0:
|
415
448
|
return 100.0
|
416
|
-
|
449
|
+
|
417
450
|
# Base score from success rate
|
418
|
-
success_rate = (
|
419
|
-
|
451
|
+
success_rate = (
|
452
|
+
self.metrics.successful_checks / self.metrics.total_checks
|
453
|
+
) * 100
|
454
|
+
|
420
455
|
# Penalize consecutive failures
|
421
456
|
failure_penalty = min(self.metrics.consecutive_failures * 10, 50)
|
422
|
-
|
457
|
+
|
423
458
|
# Penalize high response times
|
424
459
|
response_penalty = min(max(0, self.metrics.avg_response_time_ms - 50) / 10, 20)
|
425
|
-
|
460
|
+
|
426
461
|
# Calculate final score
|
427
462
|
score = success_rate - failure_penalty - response_penalty
|
428
463
|
return max(0.0, min(100.0, score))
|
429
|
-
|
464
|
+
|
430
465
|
def _analyze_retry_effectiveness(self) -> Dict:
|
431
466
|
"""Analyze retry mechanism effectiveness."""
|
432
|
-
if not hasattr(self.database_manager,
|
467
|
+
if not hasattr(self.database_manager, "_retry_executor"):
|
433
468
|
return {"status": "no_retry_executor"}
|
434
|
-
|
469
|
+
|
435
470
|
retry_executor = self.database_manager._retry_executor
|
436
471
|
if not retry_executor:
|
437
472
|
return {"status": "retry_executor_not_initialized"}
|
438
|
-
|
473
|
+
|
439
474
|
retry_stats = retry_executor.get_retry_stats()
|
440
|
-
|
475
|
+
|
441
476
|
return {
|
442
477
|
"status": "active",
|
443
478
|
"effectiveness_score": retry_stats.get("success_rate_percent", 0),
|
444
479
|
"retry_frequency": retry_stats.get("retry_rate_percent", 0),
|
445
|
-
"avg_attempts_per_operation": retry_stats.get(
|
446
|
-
|
480
|
+
"avg_attempts_per_operation": retry_stats.get(
|
481
|
+
"average_attempts_per_operation", 0
|
482
|
+
),
|
483
|
+
"is_effective": retry_stats.get("success_rate_percent", 0) > 85,
|
447
484
|
}
|
448
|
-
|
485
|
+
|
449
486
|
def _assess_connection_stability(self) -> Dict:
|
450
487
|
"""Assess connection stability."""
|
451
488
|
stability_score = 100.0
|
452
|
-
|
489
|
+
|
453
490
|
# Penalize pool refreshes
|
454
491
|
if self.metrics.pool_refreshes > 0:
|
455
492
|
stability_score -= min(self.metrics.pool_refreshes * 15, 60)
|
456
|
-
|
493
|
+
|
457
494
|
# Penalize consecutive failures
|
458
495
|
if self.metrics.consecutive_failures > 0:
|
459
496
|
stability_score -= min(self.metrics.consecutive_failures * 20, 80)
|
460
|
-
|
497
|
+
|
461
498
|
return {
|
462
499
|
"stability_score": max(0.0, stability_score),
|
463
500
|
"pool_refreshes": self.metrics.pool_refreshes,
|
464
501
|
"consecutive_failures": self.metrics.consecutive_failures,
|
465
|
-
"is_stable": stability_score > 70
|
502
|
+
"is_stable": stability_score > 70,
|
466
503
|
}
|
467
|
-
|
504
|
+
|
468
505
|
def _generate_health_recommendations(self) -> List[str]:
|
469
506
|
"""Generate health recommendations based on current metrics."""
|
470
507
|
recommendations = []
|
471
|
-
|
508
|
+
|
472
509
|
# High failure rate
|
473
510
|
if self.metrics.total_checks > 0:
|
474
|
-
failure_rate = (
|
511
|
+
failure_rate = (
|
512
|
+
self.metrics.failed_checks / self.metrics.total_checks
|
513
|
+
) * 100
|
475
514
|
if failure_rate > 20:
|
476
515
|
recommendations.append(
|
477
|
-
|
516
|
+
(
|
517
|
+
f"High failure rate ({failure_rate:.1f}%) - "
|
518
|
+
f"check database configuration"
|
519
|
+
)
|
478
520
|
)
|
479
|
-
|
521
|
+
|
480
522
|
# High response times
|
481
523
|
if self.metrics.avg_response_time_ms > 100:
|
482
524
|
recommendations.append(
|
483
|
-
|
525
|
+
(
|
526
|
+
f"High response times "
|
527
|
+
f"({self.metrics.avg_response_time_ms:.1f}ms) - "
|
528
|
+
f"consider optimizing queries"
|
529
|
+
)
|
484
530
|
)
|
485
|
-
|
531
|
+
|
486
532
|
# Approaching failure threshold
|
487
533
|
if self.metrics.consecutive_failures >= self.failure_threshold - 1:
|
488
534
|
recommendations.append(
|
489
535
|
"Approaching failure threshold - pool refresh imminent"
|
490
536
|
)
|
491
|
-
|
537
|
+
|
492
538
|
# Frequent pool refreshes
|
493
539
|
if self.metrics.pool_refreshes > 3:
|
494
540
|
recommendations.append(
|
495
|
-
|
541
|
+
(
|
542
|
+
"Frequent pool refreshes detected - investigate "
|
543
|
+
"underlying connection issues"
|
544
|
+
)
|
496
545
|
)
|
497
|
-
|
546
|
+
|
498
547
|
# No recent successful checks
|
499
|
-
if (
|
500
|
-
|
548
|
+
if (
|
549
|
+
self.metrics.last_success_time
|
550
|
+
and datetime.utcnow() - self.metrics.last_success_time
|
551
|
+
> timedelta(minutes=5)
|
552
|
+
):
|
501
553
|
recommendations.append(
|
502
|
-
|
554
|
+
(
|
555
|
+
"No successful health checks in last 5 minutes - "
|
556
|
+
"database may be unavailable"
|
557
|
+
)
|
503
558
|
)
|
504
|
-
|
559
|
+
|
505
560
|
if not recommendations:
|
506
561
|
recommendations.append("Database health is optimal")
|
507
|
-
|
562
|
+
|
508
563
|
return recommendations
|
509
564
|
|
510
565
|
|
511
566
|
class DatabaseMetricsCollector:
|
512
567
|
"""Collects and aggregates database performance metrics."""
|
513
|
-
|
568
|
+
|
514
569
|
def __init__(self):
|
515
570
|
"""Initialize metrics collector."""
|
516
571
|
self._operation_metrics = {}
|
517
572
|
self._locking_events = []
|
518
573
|
self._max_events_history = 50
|
519
|
-
|
574
|
+
|
520
575
|
def record_operation(
|
521
576
|
self,
|
522
577
|
operation_name: str,
|
523
578
|
duration_ms: float,
|
524
579
|
success: bool,
|
525
|
-
connection_pool_size: int
|
580
|
+
connection_pool_size: int,
|
526
581
|
) -> None:
|
527
582
|
"""
|
528
583
|
Record a database operation for metrics.
|
529
|
-
|
584
|
+
|
530
585
|
Args:
|
531
586
|
operation_name: Name of the database operation
|
532
587
|
duration_ms: Operation duration in milliseconds
|
@@ -540,28 +595,30 @@ class DatabaseMetricsCollector:
|
|
540
595
|
"failed_operations": 0,
|
541
596
|
"total_duration_ms": 0.0,
|
542
597
|
"avg_duration_ms": 0.0,
|
543
|
-
"min_duration_ms": float(
|
544
|
-
"max_duration_ms": 0.0
|
598
|
+
"min_duration_ms": float("inf"),
|
599
|
+
"max_duration_ms": 0.0,
|
545
600
|
}
|
546
|
-
|
601
|
+
|
547
602
|
metrics = self._operation_metrics[operation_name]
|
548
603
|
metrics["total_operations"] += 1
|
549
604
|
metrics["total_duration_ms"] += duration_ms
|
550
|
-
|
605
|
+
|
551
606
|
if success:
|
552
607
|
metrics["successful_operations"] += 1
|
553
608
|
else:
|
554
609
|
metrics["failed_operations"] += 1
|
555
|
-
|
610
|
+
|
556
611
|
# Update duration statistics
|
557
|
-
metrics["avg_duration_ms"] =
|
612
|
+
metrics["avg_duration_ms"] = (
|
613
|
+
metrics["total_duration_ms"] / metrics["total_operations"]
|
614
|
+
)
|
558
615
|
metrics["min_duration_ms"] = min(metrics["min_duration_ms"], duration_ms)
|
559
616
|
metrics["max_duration_ms"] = max(metrics["max_duration_ms"], duration_ms)
|
560
|
-
|
617
|
+
|
561
618
|
def record_locking_event(self, operation_name: str, error_message: str) -> None:
|
562
619
|
"""
|
563
620
|
Record a database locking event.
|
564
|
-
|
621
|
+
|
565
622
|
Args:
|
566
623
|
operation_name: Name of the operation that encountered locking
|
567
624
|
error_message: Error message from the locking event
|
@@ -569,57 +626,69 @@ class DatabaseMetricsCollector:
|
|
569
626
|
event = {
|
570
627
|
"timestamp": datetime.utcnow().isoformat(),
|
571
628
|
"operation_name": operation_name,
|
572
|
-
"error_message": error_message
|
629
|
+
"error_message": error_message[:1000] if error_message else None,
|
573
630
|
}
|
574
|
-
|
631
|
+
|
575
632
|
self._locking_events.append(event)
|
576
|
-
|
633
|
+
|
577
634
|
# Trim history
|
578
635
|
if len(self._locking_events) > self._max_events_history:
|
579
|
-
self._locking_events = self._locking_events[-self._max_events_history:]
|
580
|
-
|
636
|
+
self._locking_events = self._locking_events[-self._max_events_history :]
|
637
|
+
|
581
638
|
def get_operation_metrics(self) -> Dict:
|
582
639
|
"""Get aggregated operation metrics."""
|
583
640
|
return {
|
584
641
|
operation: metrics.copy()
|
585
642
|
for operation, metrics in self._operation_metrics.items()
|
586
643
|
}
|
587
|
-
|
644
|
+
|
588
645
|
def get_locking_frequency(self) -> Dict:
|
589
646
|
"""Get locking event frequency statistics."""
|
590
647
|
if not self._locking_events:
|
591
648
|
return {
|
592
649
|
"total_events": 0,
|
593
650
|
"events_last_hour": 0,
|
594
|
-
"most_frequent_operations": []
|
651
|
+
"most_frequent_operations": [],
|
595
652
|
}
|
596
|
-
|
653
|
+
|
597
654
|
# Count events in last hour
|
598
655
|
one_hour_ago = datetime.utcnow() - timedelta(hours=1)
|
599
656
|
recent_events = [
|
600
|
-
event
|
657
|
+
event
|
658
|
+
for event in self._locking_events
|
601
659
|
if datetime.fromisoformat(event["timestamp"]) > one_hour_ago
|
602
660
|
]
|
603
|
-
|
661
|
+
|
604
662
|
# Count by operation
|
605
663
|
operation_counts = {}
|
606
664
|
for event in self._locking_events:
|
607
665
|
op = event["operation_name"]
|
608
666
|
operation_counts[op] = operation_counts.get(op, 0) + 1
|
609
|
-
|
667
|
+
|
610
668
|
# Sort by frequency
|
611
669
|
most_frequent = sorted(
|
612
|
-
operation_counts.items(),
|
613
|
-
key=lambda x: x[1],
|
614
|
-
reverse=True
|
670
|
+
operation_counts.items(), key=lambda x: x[1], reverse=True
|
615
671
|
)[:5]
|
616
|
-
|
672
|
+
|
673
|
+
# Truncate error messages to prevent massive responses
|
674
|
+
recent_events_truncated = []
|
675
|
+
for event in self._locking_events[-10:]: # Last 10 events
|
676
|
+
truncated_event = {
|
677
|
+
"timestamp": event["timestamp"],
|
678
|
+
"operation_name": event["operation_name"],
|
679
|
+
"error_message": (
|
680
|
+
event["error_message"][:500] + "..."
|
681
|
+
if len(event["error_message"]) > 500
|
682
|
+
else event["error_message"]
|
683
|
+
),
|
684
|
+
}
|
685
|
+
recent_events_truncated.append(truncated_event)
|
686
|
+
|
617
687
|
return {
|
618
688
|
"total_events": len(self._locking_events),
|
619
689
|
"events_last_hour": len(recent_events),
|
620
690
|
"most_frequent_operations": [
|
621
|
-
{"operation": op, "count": count}
|
622
|
-
for op, count in most_frequent
|
691
|
+
{"operation": op, "count": count} for op, count in most_frequent
|
623
692
|
],
|
624
|
-
"recent_events":
|
693
|
+
"recent_events": recent_events_truncated,
|
625
694
|
}
|