mcp-code-indexer 3.1.4__py3-none-any.whl → 3.1.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mcp_code_indexer/__init__.py +8 -6
- mcp_code_indexer/ask_handler.py +105 -75
- mcp_code_indexer/claude_api_handler.py +125 -82
- mcp_code_indexer/cleanup_manager.py +107 -81
- mcp_code_indexer/database/connection_health.py +212 -161
- mcp_code_indexer/database/database.py +529 -415
- mcp_code_indexer/database/exceptions.py +167 -118
- mcp_code_indexer/database/models.py +54 -19
- mcp_code_indexer/database/retry_executor.py +139 -103
- mcp_code_indexer/deepask_handler.py +178 -140
- mcp_code_indexer/error_handler.py +88 -76
- mcp_code_indexer/file_scanner.py +163 -141
- mcp_code_indexer/git_hook_handler.py +352 -261
- mcp_code_indexer/logging_config.py +76 -94
- mcp_code_indexer/main.py +406 -320
- mcp_code_indexer/middleware/error_middleware.py +106 -71
- mcp_code_indexer/query_preprocessor.py +40 -40
- mcp_code_indexer/server/mcp_server.py +785 -470
- mcp_code_indexer/token_counter.py +54 -47
- {mcp_code_indexer-3.1.4.dist-info → mcp_code_indexer-3.1.5.dist-info}/METADATA +3 -3
- mcp_code_indexer-3.1.5.dist-info/RECORD +37 -0
- mcp_code_indexer-3.1.4.dist-info/RECORD +0 -37
- {mcp_code_indexer-3.1.4.dist-info → mcp_code_indexer-3.1.5.dist-info}/WHEEL +0 -0
- {mcp_code_indexer-3.1.4.dist-info → mcp_code_indexer-3.1.5.dist-info}/entry_points.txt +0 -0
- {mcp_code_indexer-3.1.4.dist-info → mcp_code_indexer-3.1.5.dist-info}/licenses/LICENSE +0 -0
- {mcp_code_indexer-3.1.4.dist-info → mcp_code_indexer-3.1.5.dist-info}/top_level.txt +0 -0
@@ -12,14 +12,13 @@ from dataclasses import dataclass, field
|
|
12
12
|
from datetime import datetime, timedelta
|
13
13
|
from typing import Dict, Optional, List
|
14
14
|
|
15
|
-
import aiosqlite
|
16
|
-
|
17
15
|
logger = logging.getLogger(__name__)
|
18
16
|
|
19
17
|
|
20
18
|
@dataclass
|
21
19
|
class HealthCheckResult:
|
22
20
|
"""Result of a database health check."""
|
21
|
+
|
23
22
|
is_healthy: bool
|
24
23
|
response_time_ms: float
|
25
24
|
error_message: Optional[str] = None
|
@@ -29,6 +28,7 @@ class HealthCheckResult:
|
|
29
28
|
@dataclass
|
30
29
|
class ConnectionMetrics:
|
31
30
|
"""Metrics for database connection monitoring."""
|
31
|
+
|
32
32
|
total_checks: int = 0
|
33
33
|
successful_checks: int = 0
|
34
34
|
failed_checks: int = 0
|
@@ -42,17 +42,17 @@ class ConnectionMetrics:
|
|
42
42
|
|
43
43
|
class ConnectionHealthMonitor:
|
44
44
|
"""Monitors database connection health with periodic checks and metrics."""
|
45
|
-
|
45
|
+
|
46
46
|
def __init__(
|
47
47
|
self,
|
48
48
|
database_manager,
|
49
49
|
check_interval: float = 30.0,
|
50
50
|
failure_threshold: int = 3,
|
51
|
-
timeout_seconds: float = 5.0
|
51
|
+
timeout_seconds: float = 5.0,
|
52
52
|
):
|
53
53
|
"""
|
54
54
|
Initialize connection health monitor.
|
55
|
-
|
55
|
+
|
56
56
|
Args:
|
57
57
|
database_manager: DatabaseManager instance to monitor
|
58
58
|
check_interval: Health check interval in seconds
|
@@ -63,39 +63,42 @@ class ConnectionHealthMonitor:
|
|
63
63
|
self.check_interval = check_interval
|
64
64
|
self.failure_threshold = failure_threshold
|
65
65
|
self.timeout_seconds = timeout_seconds
|
66
|
-
|
66
|
+
|
67
67
|
self.metrics = ConnectionMetrics()
|
68
68
|
self._monitoring_task: Optional[asyncio.Task] = None
|
69
69
|
self._is_monitoring = False
|
70
70
|
self._health_history: List[HealthCheckResult] = []
|
71
71
|
self._max_history_size = 100
|
72
|
-
|
72
|
+
|
73
73
|
async def start_monitoring(self) -> None:
|
74
74
|
"""Start periodic health monitoring."""
|
75
75
|
if self._is_monitoring:
|
76
76
|
logger.warning("Health monitoring is already running")
|
77
77
|
return
|
78
|
-
|
78
|
+
|
79
79
|
self._is_monitoring = True
|
80
80
|
self._monitoring_task = asyncio.create_task(self._monitoring_loop())
|
81
81
|
logger.info(
|
82
|
-
|
82
|
+
(
|
83
|
+
f"Started database health monitoring with "
|
84
|
+
f"{self.check_interval}s interval"
|
85
|
+
),
|
83
86
|
extra={
|
84
87
|
"structured_data": {
|
85
88
|
"health_monitoring": {
|
86
89
|
"action": "started",
|
87
90
|
"check_interval": self.check_interval,
|
88
|
-
"failure_threshold": self.failure_threshold
|
91
|
+
"failure_threshold": self.failure_threshold,
|
89
92
|
}
|
90
93
|
}
|
91
|
-
}
|
94
|
+
},
|
92
95
|
)
|
93
|
-
|
96
|
+
|
94
97
|
async def stop_monitoring(self) -> None:
|
95
98
|
"""Stop periodic health monitoring."""
|
96
99
|
if not self._is_monitoring:
|
97
100
|
return
|
98
|
-
|
101
|
+
|
99
102
|
self._is_monitoring = False
|
100
103
|
if self._monitoring_task:
|
101
104
|
self._monitoring_task.cancel()
|
@@ -104,45 +107,45 @@ class ConnectionHealthMonitor:
|
|
104
107
|
except asyncio.CancelledError:
|
105
108
|
pass
|
106
109
|
self._monitoring_task = None
|
107
|
-
|
110
|
+
|
108
111
|
logger.info("Stopped database health monitoring")
|
109
|
-
|
112
|
+
|
110
113
|
async def _monitoring_loop(self) -> None:
|
111
114
|
"""Main monitoring loop that runs periodic health checks."""
|
112
115
|
while self._is_monitoring:
|
113
116
|
try:
|
114
117
|
# Perform health check
|
115
118
|
health_result = await self.check_health()
|
116
|
-
|
119
|
+
|
117
120
|
# Update metrics
|
118
121
|
self._update_metrics(health_result)
|
119
|
-
|
122
|
+
|
120
123
|
# Store in history
|
121
124
|
self._add_to_history(health_result)
|
122
|
-
|
125
|
+
|
123
126
|
# Check if pool refresh is needed
|
124
127
|
if self.metrics.consecutive_failures >= self.failure_threshold:
|
125
128
|
await self._handle_persistent_failures()
|
126
|
-
|
129
|
+
|
127
130
|
# Log periodic health status
|
128
131
|
if self.metrics.total_checks % 10 == 0: # Every 10 checks
|
129
132
|
self._log_health_summary()
|
130
|
-
|
133
|
+
|
131
134
|
except Exception as e:
|
132
135
|
logger.error(f"Error in health monitoring loop: {e}")
|
133
|
-
|
136
|
+
|
134
137
|
# Wait for next check
|
135
138
|
await asyncio.sleep(self.check_interval)
|
136
|
-
|
139
|
+
|
137
140
|
async def check_health(self) -> HealthCheckResult:
|
138
141
|
"""
|
139
142
|
Perform a single health check on the database.
|
140
|
-
|
143
|
+
|
141
144
|
Returns:
|
142
145
|
HealthCheckResult with check status and timing
|
143
146
|
"""
|
144
147
|
start_time = time.time()
|
145
|
-
|
148
|
+
|
146
149
|
try:
|
147
150
|
# Use a timeout for the health check
|
148
151
|
async with asyncio.timeout(self.timeout_seconds):
|
@@ -150,39 +153,38 @@ class ConnectionHealthMonitor:
|
|
150
153
|
# Simple query to test connectivity
|
151
154
|
cursor = await conn.execute("SELECT 1")
|
152
155
|
result = await cursor.fetchone()
|
153
|
-
|
156
|
+
|
154
157
|
if result and result[0] == 1:
|
155
158
|
response_time = (time.time() - start_time) * 1000
|
156
159
|
return HealthCheckResult(
|
157
|
-
is_healthy=True,
|
158
|
-
response_time_ms=response_time
|
160
|
+
is_healthy=True, response_time_ms=response_time
|
159
161
|
)
|
160
162
|
else:
|
161
163
|
return HealthCheckResult(
|
162
164
|
is_healthy=False,
|
163
165
|
response_time_ms=(time.time() - start_time) * 1000,
|
164
|
-
error_message="Unexpected query result"
|
166
|
+
error_message="Unexpected query result",
|
165
167
|
)
|
166
|
-
|
168
|
+
|
167
169
|
except asyncio.TimeoutError:
|
168
170
|
return HealthCheckResult(
|
169
171
|
is_healthy=False,
|
170
172
|
response_time_ms=(time.time() - start_time) * 1000,
|
171
|
-
error_message=f"Health check timeout after {self.timeout_seconds}s"
|
173
|
+
error_message=(f"Health check timeout after {self.timeout_seconds}s"),
|
172
174
|
)
|
173
|
-
|
175
|
+
|
174
176
|
except Exception as e:
|
175
177
|
return HealthCheckResult(
|
176
178
|
is_healthy=False,
|
177
179
|
response_time_ms=(time.time() - start_time) * 1000,
|
178
|
-
error_message=str(e)
|
180
|
+
error_message=str(e),
|
179
181
|
)
|
180
|
-
|
182
|
+
|
181
183
|
def _update_metrics(self, health_result: HealthCheckResult) -> None:
|
182
184
|
"""Update connection metrics based on health check result."""
|
183
185
|
self.metrics.total_checks += 1
|
184
186
|
self.metrics.last_check_time = health_result.timestamp
|
185
|
-
|
187
|
+
|
186
188
|
if health_result.is_healthy:
|
187
189
|
self.metrics.successful_checks += 1
|
188
190
|
self.metrics.consecutive_failures = 0
|
@@ -191,64 +193,74 @@ class ConnectionHealthMonitor:
|
|
191
193
|
self.metrics.failed_checks += 1
|
192
194
|
self.metrics.consecutive_failures += 1
|
193
195
|
self.metrics.last_failure_time = health_result.timestamp
|
194
|
-
|
196
|
+
|
195
197
|
# Update average response time
|
196
198
|
if self.metrics.total_checks > 0:
|
197
199
|
current_avg = self.metrics.avg_response_time_ms
|
198
200
|
new_avg = (
|
199
|
-
|
200
|
-
|
201
|
-
)
|
201
|
+
current_avg * (self.metrics.total_checks - 1)
|
202
|
+
+ health_result.response_time_ms
|
203
|
+
) / self.metrics.total_checks
|
202
204
|
self.metrics.avg_response_time_ms = new_avg
|
203
|
-
|
205
|
+
|
204
206
|
def _add_to_history(self, health_result: HealthCheckResult) -> None:
|
205
207
|
"""Add health check result to history, maintaining size limit."""
|
206
208
|
self._health_history.append(health_result)
|
207
|
-
|
209
|
+
|
208
210
|
# Trim history if it exceeds max size
|
209
211
|
if len(self._health_history) > self._max_history_size:
|
210
|
-
self._health_history = self._health_history[-self._max_history_size:]
|
211
|
-
|
212
|
+
self._health_history = self._health_history[-self._max_history_size :]
|
213
|
+
|
212
214
|
async def _handle_persistent_failures(self) -> None:
|
213
215
|
"""Handle persistent health check failures by refreshing pool."""
|
214
216
|
logger.warning(
|
215
|
-
|
217
|
+
(
|
218
|
+
f"Detected {self.metrics.consecutive_failures} consecutive "
|
219
|
+
f"failures, refreshing connection pool"
|
220
|
+
),
|
216
221
|
extra={
|
217
222
|
"structured_data": {
|
218
223
|
"pool_refresh": {
|
219
224
|
"consecutive_failures": self.metrics.consecutive_failures,
|
220
225
|
"failure_threshold": self.failure_threshold,
|
221
|
-
"action": "pool_refresh_triggered"
|
226
|
+
"action": "pool_refresh_triggered",
|
222
227
|
}
|
223
228
|
}
|
224
|
-
}
|
229
|
+
},
|
225
230
|
)
|
226
|
-
|
231
|
+
|
227
232
|
try:
|
228
233
|
# Refresh the connection pool
|
229
234
|
await self.database_manager.close_pool()
|
230
235
|
self.metrics.pool_refreshes += 1
|
231
236
|
self.metrics.consecutive_failures = 0
|
232
|
-
|
237
|
+
|
233
238
|
# Perform immediate health check after refresh
|
234
239
|
health_result = await self.check_health()
|
235
240
|
if health_result.is_healthy:
|
236
241
|
logger.info("Connection pool refresh successful, health check passed")
|
237
242
|
else:
|
238
|
-
logger.error(
|
239
|
-
|
243
|
+
logger.error(
|
244
|
+
f"Connection pool refresh failed, health check error: "
|
245
|
+
f"{health_result.error_message}"
|
246
|
+
)
|
247
|
+
|
240
248
|
except Exception as e:
|
241
249
|
logger.error(f"Failed to refresh connection pool: {e}")
|
242
|
-
|
250
|
+
|
243
251
|
def _log_health_summary(self) -> None:
|
244
252
|
"""Log a summary of health monitoring statistics."""
|
245
253
|
success_rate = (
|
246
254
|
(self.metrics.successful_checks / self.metrics.total_checks * 100)
|
247
|
-
if self.metrics.total_checks > 0
|
255
|
+
if self.metrics.total_checks > 0
|
256
|
+
else 0
|
248
257
|
)
|
249
|
-
|
258
|
+
|
250
259
|
logger.info(
|
251
|
-
|
260
|
+
(
|
261
|
+
f"Health monitoring summary: {success_rate:.1f}% success rate "
|
262
|
+
f"over {self.metrics.total_checks} checks"
|
263
|
+
),
|
252
264
|
extra={
|
253
265
|
"structured_data": {
|
254
266
|
"health_summary": {
|
@@ -256,37 +268,38 @@ class ConnectionHealthMonitor:
|
|
256
268
|
"success_rate_percent": success_rate,
|
257
269
|
"avg_response_time_ms": self.metrics.avg_response_time_ms,
|
258
270
|
"consecutive_failures": self.metrics.consecutive_failures,
|
259
|
-
"pool_refreshes": self.metrics.pool_refreshes
|
271
|
+
"pool_refreshes": self.metrics.pool_refreshes,
|
260
272
|
}
|
261
273
|
}
|
262
|
-
}
|
274
|
+
},
|
263
275
|
)
|
264
|
-
|
276
|
+
|
265
277
|
def get_health_status(self, include_retry_stats: bool = True) -> Dict:
|
266
278
|
"""
|
267
279
|
Get current health status and metrics.
|
268
|
-
|
280
|
+
|
269
281
|
Args:
|
270
282
|
include_retry_stats: Whether to include retry executor statistics
|
271
|
-
|
283
|
+
|
272
284
|
Returns:
|
273
285
|
Dictionary with health status, metrics, recent history, and retry stats
|
274
286
|
"""
|
275
287
|
# Get recent health status (last 5 checks)
|
276
288
|
recent_checks = self._health_history[-5:] if self._health_history else []
|
277
289
|
recent_success_rate = (
|
278
|
-
sum(1 for check in recent_checks if check.is_healthy)
|
279
|
-
|
290
|
+
sum(1 for check in recent_checks if check.is_healthy)
|
291
|
+
/ len(recent_checks)
|
292
|
+
* 100
|
293
|
+
if recent_checks
|
294
|
+
else 0
|
280
295
|
)
|
281
|
-
|
296
|
+
|
282
297
|
health_status = {
|
283
298
|
"is_monitoring": self._is_monitoring,
|
284
299
|
"current_status": {
|
285
|
-
"is_healthy": (
|
286
|
-
recent_checks[-1].is_healthy if recent_checks else True
|
287
|
-
),
|
300
|
+
"is_healthy": (recent_checks[-1].is_healthy if recent_checks else True),
|
288
301
|
"consecutive_failures": self.metrics.consecutive_failures,
|
289
|
-
"recent_success_rate_percent": recent_success_rate
|
302
|
+
"recent_success_rate_percent": recent_success_rate,
|
290
303
|
},
|
291
304
|
"metrics": {
|
292
305
|
"total_checks": self.metrics.total_checks,
|
@@ -294,40 +307,52 @@ class ConnectionHealthMonitor:
|
|
294
307
|
"failed_checks": self.metrics.failed_checks,
|
295
308
|
"avg_response_time_ms": self.metrics.avg_response_time_ms,
|
296
309
|
"pool_refreshes": self.metrics.pool_refreshes,
|
297
|
-
"last_check_time":
|
298
|
-
|
299
|
-
|
310
|
+
"last_check_time": (
|
311
|
+
self.metrics.last_check_time.isoformat()
|
312
|
+
if self.metrics.last_check_time
|
313
|
+
else None
|
314
|
+
),
|
315
|
+
"last_success_time": (
|
316
|
+
self.metrics.last_success_time.isoformat()
|
317
|
+
if self.metrics.last_success_time
|
318
|
+
else None
|
319
|
+
),
|
320
|
+
"last_failure_time": (
|
321
|
+
self.metrics.last_failure_time.isoformat()
|
322
|
+
if self.metrics.last_failure_time
|
323
|
+
else None
|
324
|
+
),
|
300
325
|
},
|
301
326
|
"configuration": {
|
302
327
|
"check_interval": self.check_interval,
|
303
328
|
"failure_threshold": self.failure_threshold,
|
304
|
-
"timeout_seconds": self.timeout_seconds
|
305
|
-
}
|
329
|
+
"timeout_seconds": self.timeout_seconds,
|
330
|
+
},
|
306
331
|
}
|
307
|
-
|
332
|
+
|
308
333
|
# Include retry executor statistics if available
|
309
|
-
if include_retry_stats and hasattr(self.database_manager,
|
334
|
+
if include_retry_stats and hasattr(self.database_manager, "_retry_executor"):
|
310
335
|
retry_executor = self.database_manager._retry_executor
|
311
336
|
if retry_executor:
|
312
337
|
health_status["retry_statistics"] = retry_executor.get_retry_stats()
|
313
|
-
|
338
|
+
|
314
339
|
# Include database-level statistics if available
|
315
|
-
if hasattr(self.database_manager,
|
340
|
+
if hasattr(self.database_manager, "get_database_stats"):
|
316
341
|
try:
|
317
342
|
db_stats = self.database_manager.get_database_stats()
|
318
343
|
health_status["database_statistics"] = db_stats
|
319
344
|
except Exception as e:
|
320
345
|
logger.warning(f"Failed to get database statistics: {e}")
|
321
|
-
|
346
|
+
|
322
347
|
return health_status
|
323
|
-
|
348
|
+
|
324
349
|
def get_recent_history(self, count: int = 10) -> List[Dict]:
|
325
350
|
"""
|
326
351
|
Get recent health check history.
|
327
|
-
|
352
|
+
|
328
353
|
Args:
|
329
354
|
count: Number of recent checks to return
|
330
|
-
|
355
|
+
|
331
356
|
Returns:
|
332
357
|
List of health check results as dictionaries
|
333
358
|
"""
|
@@ -337,70 +362,74 @@ class ConnectionHealthMonitor:
|
|
337
362
|
"timestamp": check.timestamp.isoformat(),
|
338
363
|
"is_healthy": check.is_healthy,
|
339
364
|
"response_time_ms": check.response_time_ms,
|
340
|
-
"error_message": check.error_message
|
365
|
+
"error_message": check.error_message,
|
341
366
|
}
|
342
367
|
for check in recent_checks
|
343
368
|
]
|
344
|
-
|
369
|
+
|
345
370
|
def get_comprehensive_diagnostics(self) -> Dict:
|
346
371
|
"""
|
347
372
|
Get comprehensive database health diagnostics for monitoring.
|
348
|
-
|
349
|
-
This method provides detailed diagnostics suitable for the
|
373
|
+
|
374
|
+
This method provides detailed diagnostics suitable for the
|
350
375
|
check_database_health MCP tool.
|
351
|
-
|
376
|
+
|
352
377
|
Returns:
|
353
|
-
Comprehensive health diagnostics including retry metrics,
|
378
|
+
Comprehensive health diagnostics including retry metrics,
|
354
379
|
performance data, and resilience statistics
|
355
380
|
"""
|
356
381
|
# Get base health status with retry stats
|
357
382
|
base_status = self.get_health_status(include_retry_stats=True)
|
358
|
-
|
383
|
+
|
359
384
|
# Add detailed performance analysis
|
360
385
|
diagnostics = {
|
361
386
|
**base_status,
|
362
387
|
"performance_analysis": {
|
363
388
|
"health_check_performance": {
|
364
389
|
"avg_response_time_ms": self.metrics.avg_response_time_ms,
|
365
|
-
"response_time_threshold_exceeded":
|
366
|
-
|
390
|
+
"response_time_threshold_exceeded": (
|
391
|
+
self.metrics.avg_response_time_ms > 100
|
392
|
+
),
|
393
|
+
"recent_performance_trend": self._get_performance_trend(),
|
367
394
|
},
|
368
395
|
"failure_analysis": {
|
369
396
|
"failure_rate_percent": (
|
370
397
|
(self.metrics.failed_checks / self.metrics.total_checks * 100)
|
371
|
-
if self.metrics.total_checks > 0
|
398
|
+
if self.metrics.total_checks > 0
|
399
|
+
else 0
|
372
400
|
),
|
373
401
|
"consecutive_failures": self.metrics.consecutive_failures,
|
374
402
|
"approaching_failure_threshold": (
|
375
403
|
self.metrics.consecutive_failures >= self.failure_threshold - 1
|
376
404
|
),
|
377
|
-
"pool_refresh_frequency": self.metrics.pool_refreshes
|
378
|
-
}
|
405
|
+
"pool_refresh_frequency": self.metrics.pool_refreshes,
|
406
|
+
},
|
379
407
|
},
|
380
408
|
"resilience_indicators": {
|
381
409
|
"overall_health_score": self._calculate_health_score(),
|
382
410
|
"retry_effectiveness": self._analyze_retry_effectiveness(),
|
383
411
|
"connection_stability": self._assess_connection_stability(),
|
384
|
-
"recommendations": self._generate_health_recommendations()
|
412
|
+
"recommendations": self._generate_health_recommendations(),
|
385
413
|
},
|
386
|
-
"recent_history": self.get_recent_history(count=5)
|
414
|
+
"recent_history": self.get_recent_history(count=5),
|
387
415
|
}
|
388
|
-
|
416
|
+
|
389
417
|
return diagnostics
|
390
|
-
|
418
|
+
|
391
419
|
def _get_performance_trend(self) -> str:
|
392
420
|
"""Analyze recent performance trend."""
|
393
421
|
if len(self._health_history) < 5:
|
394
422
|
return "insufficient_data"
|
395
|
-
|
423
|
+
|
396
424
|
recent_times = [
|
397
|
-
check.response_time_ms
|
425
|
+
check.response_time_ms
|
426
|
+
for check in self._health_history[-5:]
|
398
427
|
if check.is_healthy
|
399
428
|
]
|
400
|
-
|
429
|
+
|
401
430
|
if len(recent_times) < 2:
|
402
431
|
return "insufficient_healthy_checks"
|
403
|
-
|
432
|
+
|
404
433
|
# Simple trend analysis
|
405
434
|
if recent_times[-1] > recent_times[0] * 1.5:
|
406
435
|
return "degrading"
|
@@ -408,125 +437,147 @@ class ConnectionHealthMonitor:
|
|
408
437
|
return "improving"
|
409
438
|
else:
|
410
439
|
return "stable"
|
411
|
-
|
440
|
+
|
412
441
|
def _calculate_health_score(self) -> float:
|
413
442
|
"""Calculate overall health score (0-100)."""
|
414
443
|
if self.metrics.total_checks == 0:
|
415
444
|
return 100.0
|
416
|
-
|
445
|
+
|
417
446
|
# Base score from success rate
|
418
|
-
success_rate = (
|
419
|
-
|
447
|
+
success_rate = (
|
448
|
+
self.metrics.successful_checks / self.metrics.total_checks
|
449
|
+
) * 100
|
450
|
+
|
420
451
|
# Penalize consecutive failures
|
421
452
|
failure_penalty = min(self.metrics.consecutive_failures * 10, 50)
|
422
|
-
|
453
|
+
|
423
454
|
# Penalize high response times
|
424
455
|
response_penalty = min(max(0, self.metrics.avg_response_time_ms - 50) / 10, 20)
|
425
|
-
|
456
|
+
|
426
457
|
# Calculate final score
|
427
458
|
score = success_rate - failure_penalty - response_penalty
|
428
459
|
return max(0.0, min(100.0, score))
|
429
|
-
|
460
|
+
|
430
461
|
def _analyze_retry_effectiveness(self) -> Dict:
|
431
462
|
"""Analyze retry mechanism effectiveness."""
|
432
|
-
if not hasattr(self.database_manager,
|
463
|
+
if not hasattr(self.database_manager, "_retry_executor"):
|
433
464
|
return {"status": "no_retry_executor"}
|
434
|
-
|
465
|
+
|
435
466
|
retry_executor = self.database_manager._retry_executor
|
436
467
|
if not retry_executor:
|
437
468
|
return {"status": "retry_executor_not_initialized"}
|
438
|
-
|
469
|
+
|
439
470
|
retry_stats = retry_executor.get_retry_stats()
|
440
|
-
|
471
|
+
|
441
472
|
return {
|
442
473
|
"status": "active",
|
443
474
|
"effectiveness_score": retry_stats.get("success_rate_percent", 0),
|
444
475
|
"retry_frequency": retry_stats.get("retry_rate_percent", 0),
|
445
|
-
"avg_attempts_per_operation": retry_stats.get(
|
446
|
-
|
476
|
+
"avg_attempts_per_operation": retry_stats.get(
|
477
|
+
"average_attempts_per_operation", 0
|
478
|
+
),
|
479
|
+
"is_effective": retry_stats.get("success_rate_percent", 0) > 85,
|
447
480
|
}
|
448
|
-
|
481
|
+
|
449
482
|
def _assess_connection_stability(self) -> Dict:
|
450
483
|
"""Assess connection stability."""
|
451
484
|
stability_score = 100.0
|
452
|
-
|
485
|
+
|
453
486
|
# Penalize pool refreshes
|
454
487
|
if self.metrics.pool_refreshes > 0:
|
455
488
|
stability_score -= min(self.metrics.pool_refreshes * 15, 60)
|
456
|
-
|
489
|
+
|
457
490
|
# Penalize consecutive failures
|
458
491
|
if self.metrics.consecutive_failures > 0:
|
459
492
|
stability_score -= min(self.metrics.consecutive_failures * 20, 80)
|
460
|
-
|
493
|
+
|
461
494
|
return {
|
462
495
|
"stability_score": max(0.0, stability_score),
|
463
496
|
"pool_refreshes": self.metrics.pool_refreshes,
|
464
497
|
"consecutive_failures": self.metrics.consecutive_failures,
|
465
|
-
"is_stable": stability_score > 70
|
498
|
+
"is_stable": stability_score > 70,
|
466
499
|
}
|
467
|
-
|
500
|
+
|
468
501
|
def _generate_health_recommendations(self) -> List[str]:
|
469
502
|
"""Generate health recommendations based on current metrics."""
|
470
503
|
recommendations = []
|
471
|
-
|
504
|
+
|
472
505
|
# High failure rate
|
473
506
|
if self.metrics.total_checks > 0:
|
474
|
-
failure_rate = (
|
507
|
+
failure_rate = (
|
508
|
+
self.metrics.failed_checks / self.metrics.total_checks
|
509
|
+
) * 100
|
475
510
|
if failure_rate > 20:
|
476
511
|
recommendations.append(
|
477
|
-
|
512
|
+
(
|
513
|
+
f"High failure rate ({failure_rate:.1f}%) - "
|
514
|
+
f"check database configuration"
|
515
|
+
)
|
478
516
|
)
|
479
|
-
|
517
|
+
|
480
518
|
# High response times
|
481
519
|
if self.metrics.avg_response_time_ms > 100:
|
482
520
|
recommendations.append(
|
483
|
-
|
521
|
+
(
|
522
|
+
f"High response times "
|
523
|
+
f"({self.metrics.avg_response_time_ms:.1f}ms) - "
|
524
|
+
f"consider optimizing queries"
|
525
|
+
)
|
484
526
|
)
|
485
|
-
|
527
|
+
|
486
528
|
# Approaching failure threshold
|
487
529
|
if self.metrics.consecutive_failures >= self.failure_threshold - 1:
|
488
530
|
recommendations.append(
|
489
531
|
"Approaching failure threshold - pool refresh imminent"
|
490
532
|
)
|
491
|
-
|
533
|
+
|
492
534
|
# Frequent pool refreshes
|
493
535
|
if self.metrics.pool_refreshes > 3:
|
494
536
|
recommendations.append(
|
495
|
-
|
537
|
+
(
|
538
|
+
"Frequent pool refreshes detected - investigate "
|
539
|
+
"underlying connection issues"
|
540
|
+
)
|
496
541
|
)
|
497
|
-
|
542
|
+
|
498
543
|
# No recent successful checks
|
499
|
-
if (
|
500
|
-
|
544
|
+
if (
|
545
|
+
self.metrics.last_success_time
|
546
|
+
and datetime.utcnow() - self.metrics.last_success_time
|
547
|
+
> timedelta(minutes=5)
|
548
|
+
):
|
501
549
|
recommendations.append(
|
502
|
-
|
550
|
+
(
|
551
|
+
"No successful health checks in last 5 minutes - "
|
552
|
+
"database may be unavailable"
|
553
|
+
)
|
503
554
|
)
|
504
|
-
|
555
|
+
|
505
556
|
if not recommendations:
|
506
557
|
recommendations.append("Database health is optimal")
|
507
|
-
|
558
|
+
|
508
559
|
return recommendations
|
509
560
|
|
510
561
|
|
511
562
|
class DatabaseMetricsCollector:
|
512
563
|
"""Collects and aggregates database performance metrics."""
|
513
|
-
|
564
|
+
|
514
565
|
def __init__(self):
|
515
566
|
"""Initialize metrics collector."""
|
516
567
|
self._operation_metrics = {}
|
517
568
|
self._locking_events = []
|
518
569
|
self._max_events_history = 50
|
519
|
-
|
570
|
+
|
520
571
|
def record_operation(
|
521
572
|
self,
|
522
573
|
operation_name: str,
|
523
574
|
duration_ms: float,
|
524
575
|
success: bool,
|
525
|
-
connection_pool_size: int
|
576
|
+
connection_pool_size: int,
|
526
577
|
) -> None:
|
527
578
|
"""
|
528
579
|
Record a database operation for metrics.
|
529
|
-
|
580
|
+
|
530
581
|
Args:
|
531
582
|
operation_name: Name of the database operation
|
532
583
|
duration_ms: Operation duration in milliseconds
|
@@ -540,28 +591,30 @@ class DatabaseMetricsCollector:
|
|
540
591
|
"failed_operations": 0,
|
541
592
|
"total_duration_ms": 0.0,
|
542
593
|
"avg_duration_ms": 0.0,
|
543
|
-
"min_duration_ms": float(
|
544
|
-
"max_duration_ms": 0.0
|
594
|
+
"min_duration_ms": float("inf"),
|
595
|
+
"max_duration_ms": 0.0,
|
545
596
|
}
|
546
|
-
|
597
|
+
|
547
598
|
metrics = self._operation_metrics[operation_name]
|
548
599
|
metrics["total_operations"] += 1
|
549
600
|
metrics["total_duration_ms"] += duration_ms
|
550
|
-
|
601
|
+
|
551
602
|
if success:
|
552
603
|
metrics["successful_operations"] += 1
|
553
604
|
else:
|
554
605
|
metrics["failed_operations"] += 1
|
555
|
-
|
606
|
+
|
556
607
|
# Update duration statistics
|
557
|
-
metrics["avg_duration_ms"] =
|
608
|
+
metrics["avg_duration_ms"] = (
|
609
|
+
metrics["total_duration_ms"] / metrics["total_operations"]
|
610
|
+
)
|
558
611
|
metrics["min_duration_ms"] = min(metrics["min_duration_ms"], duration_ms)
|
559
612
|
metrics["max_duration_ms"] = max(metrics["max_duration_ms"], duration_ms)
|
560
|
-
|
613
|
+
|
561
614
|
def record_locking_event(self, operation_name: str, error_message: str) -> None:
|
562
615
|
"""
|
563
616
|
Record a database locking event.
|
564
|
-
|
617
|
+
|
565
618
|
Args:
|
566
619
|
operation_name: Name of the operation that encountered locking
|
567
620
|
error_message: Error message from the locking event
|
@@ -569,57 +622,55 @@ class DatabaseMetricsCollector:
|
|
569
622
|
event = {
|
570
623
|
"timestamp": datetime.utcnow().isoformat(),
|
571
624
|
"operation_name": operation_name,
|
572
|
-
"error_message": error_message
|
625
|
+
"error_message": error_message,
|
573
626
|
}
|
574
|
-
|
627
|
+
|
575
628
|
self._locking_events.append(event)
|
576
|
-
|
629
|
+
|
577
630
|
# Trim history
|
578
631
|
if len(self._locking_events) > self._max_events_history:
|
579
|
-
self._locking_events = self._locking_events[-self._max_events_history:]
|
580
|
-
|
632
|
+
self._locking_events = self._locking_events[-self._max_events_history :]
|
633
|
+
|
581
634
|
def get_operation_metrics(self) -> Dict:
|
582
635
|
"""Get aggregated operation metrics."""
|
583
636
|
return {
|
584
637
|
operation: metrics.copy()
|
585
638
|
for operation, metrics in self._operation_metrics.items()
|
586
639
|
}
|
587
|
-
|
640
|
+
|
588
641
|
def get_locking_frequency(self) -> Dict:
|
589
642
|
"""Get locking event frequency statistics."""
|
590
643
|
if not self._locking_events:
|
591
644
|
return {
|
592
645
|
"total_events": 0,
|
593
646
|
"events_last_hour": 0,
|
594
|
-
"most_frequent_operations": []
|
647
|
+
"most_frequent_operations": [],
|
595
648
|
}
|
596
|
-
|
649
|
+
|
597
650
|
# Count events in last hour
|
598
651
|
one_hour_ago = datetime.utcnow() - timedelta(hours=1)
|
599
652
|
recent_events = [
|
600
|
-
event
|
653
|
+
event
|
654
|
+
for event in self._locking_events
|
601
655
|
if datetime.fromisoformat(event["timestamp"]) > one_hour_ago
|
602
656
|
]
|
603
|
-
|
657
|
+
|
604
658
|
# Count by operation
|
605
659
|
operation_counts = {}
|
606
660
|
for event in self._locking_events:
|
607
661
|
op = event["operation_name"]
|
608
662
|
operation_counts[op] = operation_counts.get(op, 0) + 1
|
609
|
-
|
663
|
+
|
610
664
|
# Sort by frequency
|
611
665
|
most_frequent = sorted(
|
612
|
-
operation_counts.items(),
|
613
|
-
key=lambda x: x[1],
|
614
|
-
reverse=True
|
666
|
+
operation_counts.items(), key=lambda x: x[1], reverse=True
|
615
667
|
)[:5]
|
616
|
-
|
668
|
+
|
617
669
|
return {
|
618
670
|
"total_events": len(self._locking_events),
|
619
671
|
"events_last_hour": len(recent_events),
|
620
672
|
"most_frequent_operations": [
|
621
|
-
{"operation": op, "count": count}
|
622
|
-
for op, count in most_frequent
|
673
|
+
{"operation": op, "count": count} for op, count in most_frequent
|
623
674
|
],
|
624
|
-
"recent_events": self._locking_events[-10:] # Last 10 events
|
675
|
+
"recent_events": self._locking_events[-10:], # Last 10 events
|
625
676
|
}
|