kailash 0.8.5__py3-none-any.whl → 0.8.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- kailash/__init__.py +5 -5
- kailash/channels/__init__.py +2 -1
- kailash/channels/mcp_channel.py +23 -4
- kailash/cli/validate_imports.py +202 -0
- kailash/core/resilience/bulkhead.py +15 -5
- kailash/core/resilience/circuit_breaker.py +4 -1
- kailash/core/resilience/health_monitor.py +312 -84
- kailash/edge/migration/edge_migration_service.py +384 -0
- kailash/mcp_server/server.py +351 -8
- kailash/mcp_server/transports.py +305 -0
- kailash/middleware/gateway/event_store.py +1 -0
- kailash/nodes/base.py +77 -1
- kailash/nodes/code/python.py +44 -3
- kailash/nodes/data/async_sql.py +42 -20
- kailash/nodes/edge/edge_migration_node.py +16 -12
- kailash/nodes/governance.py +410 -0
- kailash/nodes/rag/registry.py +1 -1
- kailash/nodes/transaction/distributed_transaction_manager.py +48 -1
- kailash/nodes/transaction/saga_state_storage.py +2 -1
- kailash/nodes/validation.py +8 -8
- kailash/runtime/local.py +30 -0
- kailash/runtime/validation/__init__.py +7 -15
- kailash/runtime/validation/import_validator.py +446 -0
- kailash/runtime/validation/suggestion_engine.py +5 -5
- kailash/utils/data_paths.py +74 -0
- kailash/workflow/builder.py +183 -4
- kailash/workflow/mermaid_visualizer.py +3 -1
- kailash/workflow/templates.py +6 -6
- kailash/workflow/validation.py +134 -3
- {kailash-0.8.5.dist-info → kailash-0.8.6.dist-info}/METADATA +19 -17
- {kailash-0.8.5.dist-info → kailash-0.8.6.dist-info}/RECORD +35 -30
- {kailash-0.8.5.dist-info → kailash-0.8.6.dist-info}/WHEEL +0 -0
- {kailash-0.8.5.dist-info → kailash-0.8.6.dist-info}/entry_points.txt +0 -0
- {kailash-0.8.5.dist-info → kailash-0.8.6.dist-info}/licenses/LICENSE +0 -0
- {kailash-0.8.5.dist-info → kailash-0.8.6.dist-info}/top_level.txt +0 -0
@@ -64,10 +64,33 @@ class HealthCheckResult:
|
|
64
64
|
error_message: Optional[str] = None
|
65
65
|
is_healthy: bool = field(init=False)
|
66
66
|
|
67
|
+
# Additional attributes for compatibility
|
68
|
+
check_name: str = field(default="", init=False)
|
69
|
+
message: str = field(default="", init=False)
|
70
|
+
error: Optional[str] = field(default=None, init=False)
|
71
|
+
metadata: Dict[str, Any] = field(default_factory=dict, init=False)
|
72
|
+
|
67
73
|
def __post_init__(self):
|
68
|
-
"""Calculate health status."""
|
74
|
+
"""Calculate health status and initialize compatibility fields."""
|
69
75
|
self.is_healthy = self.status in [HealthStatus.HEALTHY, HealthStatus.DEGRADED]
|
70
76
|
|
77
|
+
# Initialize compatibility fields
|
78
|
+
self.check_name = self.service_name
|
79
|
+
self.error = self.error_message
|
80
|
+
self.metadata = self.details.copy()
|
81
|
+
|
82
|
+
# Set message based on status
|
83
|
+
if self.status == HealthStatus.HEALTHY:
|
84
|
+
self.message = "Service is healthy"
|
85
|
+
elif self.status == HealthStatus.DEGRADED:
|
86
|
+
self.message = "Service is degraded but functional"
|
87
|
+
elif self.status == HealthStatus.UNHEALTHY:
|
88
|
+
self.message = (
|
89
|
+
f"Service is unhealthy: {self.error_message or 'Unknown error'}"
|
90
|
+
)
|
91
|
+
else:
|
92
|
+
self.message = "Service status unknown"
|
93
|
+
|
71
94
|
|
72
95
|
@dataclass
|
73
96
|
class HealthMetrics:
|
@@ -123,10 +146,20 @@ class HealthCheck(ABC):
|
|
123
146
|
class DatabaseHealthCheck(HealthCheck):
|
124
147
|
"""Health check for database connections."""
|
125
148
|
|
126
|
-
def __init__(self, name: str,
|
149
|
+
def __init__(self, name: str, database_node_or_connection_string, **kwargs):
|
127
150
|
"""Initialize database health check."""
|
128
151
|
super().__init__(name, **kwargs)
|
129
|
-
self.
|
152
|
+
self.check_name = name # Required by HealthCheckManager
|
153
|
+
|
154
|
+
# Handle both database node objects and connection strings
|
155
|
+
if hasattr(database_node_or_connection_string, "execute"):
|
156
|
+
# It's a database node object
|
157
|
+
self.database_node = database_node_or_connection_string
|
158
|
+
self.connection_string = None
|
159
|
+
else:
|
160
|
+
# It's a connection string
|
161
|
+
self.connection_string = database_node_or_connection_string
|
162
|
+
self.database_node = None
|
130
163
|
|
131
164
|
async def check_health(self) -> HealthCheckResult:
|
132
165
|
"""Check database health."""
|
@@ -134,41 +167,72 @@ class DatabaseHealthCheck(HealthCheck):
|
|
134
167
|
check_id = str(uuid4())
|
135
168
|
|
136
169
|
try:
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
170
|
+
if self.database_node:
|
171
|
+
# Use database node object directly
|
172
|
+
result = await self.database_node.execute(
|
173
|
+
"SELECT 1 as health_check", "dict"
|
174
|
+
)
|
141
175
|
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
176
|
+
response_time = (time.time() - start_time) * 1000
|
177
|
+
|
178
|
+
if result and result.get("success"):
|
179
|
+
return HealthCheckResult(
|
180
|
+
check_id=check_id,
|
181
|
+
service_name=self.name,
|
182
|
+
status=HealthStatus.HEALTHY,
|
183
|
+
response_time_ms=response_time,
|
184
|
+
details={
|
185
|
+
"query_executed": True,
|
186
|
+
"query_result": result.get("data", []),
|
187
|
+
},
|
188
|
+
)
|
189
|
+
else:
|
190
|
+
return HealthCheckResult(
|
191
|
+
check_id=check_id,
|
192
|
+
service_name=self.name,
|
193
|
+
status=HealthStatus.DEGRADED,
|
194
|
+
response_time_ms=response_time,
|
195
|
+
details={"query_executed": True, "query_result": []},
|
196
|
+
error_message="Query returned no success result",
|
197
|
+
)
|
198
|
+
else:
|
199
|
+
# Use connection string with SQL node
|
200
|
+
from src.kailash.nodes.data.sql import SQLDatabaseNode
|
147
201
|
|
148
|
-
|
202
|
+
sql_node = SQLDatabaseNode(connection_string=self.connection_string)
|
149
203
|
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
details={
|
157
|
-
"query_executed": True,
|
158
|
-
"rows_returned": len(result["data"]),
|
159
|
-
"execution_time": result.get("execution_time", 0),
|
160
|
-
},
|
161
|
-
)
|
162
|
-
else:
|
163
|
-
return HealthCheckResult(
|
164
|
-
check_id=check_id,
|
165
|
-
service_name=self.name,
|
166
|
-
status=HealthStatus.DEGRADED,
|
167
|
-
response_time_ms=response_time,
|
168
|
-
details={"query_executed": True, "rows_returned": 0},
|
169
|
-
error_message="Query returned no data",
|
204
|
+
# Execute simple health check query
|
205
|
+
result = await asyncio.wait_for(
|
206
|
+
asyncio.to_thread(
|
207
|
+
sql_node.execute, query="SELECT 1 as health_check"
|
208
|
+
),
|
209
|
+
timeout=self.timeout,
|
170
210
|
)
|
171
211
|
|
212
|
+
response_time = (time.time() - start_time) * 1000
|
213
|
+
|
214
|
+
if "data" in result and len(result["data"]) > 0:
|
215
|
+
return HealthCheckResult(
|
216
|
+
check_id=check_id,
|
217
|
+
service_name=self.name,
|
218
|
+
status=HealthStatus.HEALTHY,
|
219
|
+
response_time_ms=response_time,
|
220
|
+
details={
|
221
|
+
"query_executed": True,
|
222
|
+
"rows_returned": len(result["data"]),
|
223
|
+
"execution_time": result.get("execution_time", 0),
|
224
|
+
},
|
225
|
+
)
|
226
|
+
else:
|
227
|
+
return HealthCheckResult(
|
228
|
+
check_id=check_id,
|
229
|
+
service_name=self.name,
|
230
|
+
status=HealthStatus.DEGRADED,
|
231
|
+
response_time_ms=response_time,
|
232
|
+
details={"query_executed": True, "rows_returned": 0},
|
233
|
+
error_message="Query returned no data",
|
234
|
+
)
|
235
|
+
|
172
236
|
except asyncio.TimeoutError:
|
173
237
|
response_time = (time.time() - start_time) * 1000
|
174
238
|
return HealthCheckResult(
|
@@ -196,6 +260,7 @@ class RedisHealthCheck(HealthCheck):
|
|
196
260
|
"""Initialize Redis health check."""
|
197
261
|
super().__init__(name, **kwargs)
|
198
262
|
self.redis_config = redis_config
|
263
|
+
self.check_name = name # Required by HealthCheckManager
|
199
264
|
|
200
265
|
async def check_health(self) -> HealthCheckResult:
|
201
266
|
"""Check Redis health."""
|
@@ -249,6 +314,148 @@ class RedisHealthCheck(HealthCheck):
|
|
249
314
|
)
|
250
315
|
|
251
316
|
|
317
|
+
class MemoryHealthCheck(HealthCheck):
|
318
|
+
"""Health check for system memory usage."""
|
319
|
+
|
320
|
+
def __init__(
|
321
|
+
self,
|
322
|
+
name: str,
|
323
|
+
warning_threshold: float = 80.0,
|
324
|
+
critical_threshold: float = 95.0,
|
325
|
+
**kwargs,
|
326
|
+
):
|
327
|
+
"""Initialize memory health check."""
|
328
|
+
super().__init__(name, **kwargs)
|
329
|
+
self.warning_threshold = warning_threshold
|
330
|
+
self.critical_threshold = critical_threshold
|
331
|
+
self.check_name = name # Required by HealthCheckManager
|
332
|
+
|
333
|
+
async def check_health(self) -> HealthCheckResult:
|
334
|
+
"""Check system memory health."""
|
335
|
+
start_time = time.time()
|
336
|
+
check_id = str(uuid4())
|
337
|
+
|
338
|
+
try:
|
339
|
+
import psutil
|
340
|
+
|
341
|
+
memory = psutil.virtual_memory()
|
342
|
+
response_time = (time.time() - start_time) * 1000
|
343
|
+
|
344
|
+
# Determine status based on memory usage
|
345
|
+
if memory.percent >= self.critical_threshold:
|
346
|
+
status = HealthStatus.UNHEALTHY
|
347
|
+
message = f"Critical memory usage: {memory.percent:.1f}%"
|
348
|
+
elif memory.percent >= self.warning_threshold:
|
349
|
+
status = HealthStatus.DEGRADED
|
350
|
+
message = f"High memory usage: {memory.percent:.1f}%"
|
351
|
+
else:
|
352
|
+
status = HealthStatus.HEALTHY
|
353
|
+
message = f"Memory usage normal: {memory.percent:.1f}%"
|
354
|
+
|
355
|
+
return HealthCheckResult(
|
356
|
+
check_id=check_id,
|
357
|
+
service_name=self.name,
|
358
|
+
status=status,
|
359
|
+
response_time_ms=response_time,
|
360
|
+
details={
|
361
|
+
"memory_percent": memory.percent,
|
362
|
+
"total_memory": memory.total,
|
363
|
+
"available_memory": memory.available,
|
364
|
+
"used_memory": memory.used,
|
365
|
+
},
|
366
|
+
error_message=message if status != HealthStatus.HEALTHY else None,
|
367
|
+
)
|
368
|
+
|
369
|
+
except Exception as e:
|
370
|
+
response_time = (time.time() - start_time) * 1000
|
371
|
+
return HealthCheckResult(
|
372
|
+
check_id=check_id,
|
373
|
+
service_name=self.name,
|
374
|
+
status=HealthStatus.UNHEALTHY,
|
375
|
+
response_time_ms=response_time,
|
376
|
+
error_message=str(e),
|
377
|
+
)
|
378
|
+
|
379
|
+
|
380
|
+
class CustomHealthCheck(HealthCheck):
|
381
|
+
"""Custom health check that executes user-defined check function."""
|
382
|
+
|
383
|
+
def __init__(self, name: str, check_function: Callable, **kwargs):
|
384
|
+
"""Initialize custom health check."""
|
385
|
+
super().__init__(name, **kwargs)
|
386
|
+
self.check_function = check_function
|
387
|
+
self.check_name = name # Required by HealthCheckManager
|
388
|
+
|
389
|
+
async def check_health(self) -> HealthCheckResult:
|
390
|
+
"""Execute custom health check function."""
|
391
|
+
start_time = time.time()
|
392
|
+
check_id = str(uuid4())
|
393
|
+
|
394
|
+
try:
|
395
|
+
# Execute the custom check function
|
396
|
+
if asyncio.iscoroutinefunction(self.check_function):
|
397
|
+
result = await asyncio.wait_for(
|
398
|
+
self.check_function(), timeout=self.timeout
|
399
|
+
)
|
400
|
+
else:
|
401
|
+
result = await asyncio.wait_for(
|
402
|
+
asyncio.to_thread(self.check_function), timeout=self.timeout
|
403
|
+
)
|
404
|
+
|
405
|
+
response_time = (time.time() - start_time) * 1000
|
406
|
+
|
407
|
+
# Handle different result formats
|
408
|
+
if isinstance(result, bool):
|
409
|
+
status = HealthStatus.HEALTHY if result else HealthStatus.UNHEALTHY
|
410
|
+
message = "Check passed" if result else "Check failed"
|
411
|
+
details = {"result": result}
|
412
|
+
elif isinstance(result, dict):
|
413
|
+
# Expect dict with status, message, metadata
|
414
|
+
status_str = result.get("status", "healthy").lower()
|
415
|
+
if status_str == "healthy":
|
416
|
+
status = HealthStatus.HEALTHY
|
417
|
+
elif status_str == "degraded":
|
418
|
+
status = HealthStatus.DEGRADED
|
419
|
+
else:
|
420
|
+
status = HealthStatus.UNHEALTHY
|
421
|
+
|
422
|
+
message = result.get("message", "Custom check completed")
|
423
|
+
details = result.get("metadata", {})
|
424
|
+
else:
|
425
|
+
# Assume success if we get any non-false result
|
426
|
+
status = HealthStatus.HEALTHY
|
427
|
+
message = "Custom check completed"
|
428
|
+
details = {"result": str(result)}
|
429
|
+
|
430
|
+
return HealthCheckResult(
|
431
|
+
check_id=check_id,
|
432
|
+
service_name=self.name,
|
433
|
+
status=status,
|
434
|
+
response_time_ms=response_time,
|
435
|
+
details=details,
|
436
|
+
error_message=None if status == HealthStatus.HEALTHY else message,
|
437
|
+
)
|
438
|
+
|
439
|
+
except asyncio.TimeoutError:
|
440
|
+
response_time = (time.time() - start_time) * 1000
|
441
|
+
return HealthCheckResult(
|
442
|
+
check_id=check_id,
|
443
|
+
service_name=self.name,
|
444
|
+
status=HealthStatus.UNHEALTHY,
|
445
|
+
response_time_ms=response_time,
|
446
|
+
error_message=f"Custom health check timeout after {self.timeout}s",
|
447
|
+
)
|
448
|
+
except Exception as e:
|
449
|
+
response_time = (time.time() - start_time) * 1000
|
450
|
+
return HealthCheckResult(
|
451
|
+
check_id=check_id,
|
452
|
+
service_name=self.name,
|
453
|
+
status=HealthStatus.UNHEALTHY,
|
454
|
+
response_time_ms=response_time,
|
455
|
+
error_message=str(e),
|
456
|
+
)
|
457
|
+
|
458
|
+
|
252
459
|
class HTTPHealthCheck(HealthCheck):
|
253
460
|
"""Health check for HTTP endpoints."""
|
254
461
|
|
@@ -257,6 +464,7 @@ class HTTPHealthCheck(HealthCheck):
|
|
257
464
|
super().__init__(name, **kwargs)
|
258
465
|
self.url = url
|
259
466
|
self.expected_status = expected_status
|
467
|
+
self.check_name = name # Required by HealthCheckManager
|
260
468
|
|
261
469
|
async def check_health(self) -> HealthCheckResult:
|
262
470
|
"""Check HTTP endpoint health."""
|
@@ -581,7 +789,7 @@ async def quick_health_check(service_name: str) -> bool:
|
|
581
789
|
@dataclass
|
582
790
|
class HealthSummary:
|
583
791
|
"""Health summary for all checks."""
|
584
|
-
|
792
|
+
|
585
793
|
total_checks: int
|
586
794
|
healthy_checks: int
|
587
795
|
degraded_checks: int
|
@@ -592,7 +800,7 @@ class HealthSummary:
|
|
592
800
|
|
593
801
|
class HealthCheckManager:
|
594
802
|
"""Manager for orchestrating multiple health checks with configuration."""
|
595
|
-
|
803
|
+
|
596
804
|
def __init__(self, config: Dict[str, Any]):
|
597
805
|
"""Initialize health check manager with configuration."""
|
598
806
|
self.config = config
|
@@ -600,77 +808,77 @@ class HealthCheckManager:
|
|
600
808
|
self.default_interval = config.get("default_interval", 30.0)
|
601
809
|
self.parallel_checks = config.get("parallel_checks", True)
|
602
810
|
self.max_concurrent_checks = config.get("max_concurrent_checks", 10)
|
603
|
-
|
811
|
+
|
604
812
|
self.health_checks: Dict[str, HealthCheck] = {}
|
605
813
|
self.check_intervals: Dict[str, float] = {}
|
606
814
|
self.last_results: Dict[str, HealthCheckResult] = {}
|
607
815
|
self.history: Dict[str, List[HealthCheckResult]] = {}
|
608
816
|
self.status_change_callbacks: List[Callable] = []
|
609
817
|
self._running = False
|
610
|
-
|
818
|
+
|
611
819
|
def register_health_check(self, health_check: HealthCheck, interval: float = None):
|
612
820
|
"""Register a health check with optional interval."""
|
613
821
|
check_name = health_check.check_name
|
614
822
|
self.health_checks[check_name] = health_check
|
615
823
|
self.check_intervals[check_name] = interval or self.default_interval
|
616
824
|
self.history[check_name] = []
|
617
|
-
|
825
|
+
|
618
826
|
async def run_health_check(self, check_name: str) -> HealthCheckResult:
|
619
827
|
"""Run a specific health check."""
|
620
828
|
if check_name not in self.health_checks:
|
621
829
|
raise ValueError(f"Health check '{check_name}' not found")
|
622
|
-
|
830
|
+
|
623
831
|
health_check = self.health_checks[check_name]
|
624
832
|
result = await health_check.check_health()
|
625
|
-
|
833
|
+
|
834
|
+
# Check for status changes before storing new result
|
835
|
+
await self._check_status_change(check_name, result)
|
836
|
+
|
626
837
|
# Store result
|
627
838
|
self.last_results[check_name] = result
|
628
839
|
self.history[check_name].append(result)
|
629
|
-
|
630
|
-
# Check for status changes
|
631
|
-
await self._check_status_change(check_name, result)
|
632
|
-
|
840
|
+
|
633
841
|
return result
|
634
|
-
|
842
|
+
|
635
843
|
async def run_all_health_checks(self) -> List[HealthCheckResult]:
|
636
844
|
"""Run all registered health checks."""
|
637
845
|
if not self.health_checks:
|
638
846
|
return []
|
639
|
-
|
847
|
+
|
640
848
|
if self.parallel_checks:
|
641
849
|
# Run checks in parallel
|
642
850
|
tasks = [
|
643
|
-
self.run_health_check(check_name)
|
851
|
+
self.run_health_check(check_name)
|
644
852
|
for check_name in self.health_checks.keys()
|
645
853
|
]
|
646
|
-
|
854
|
+
|
647
855
|
# Limit concurrency
|
648
856
|
semaphore = asyncio.Semaphore(self.max_concurrent_checks)
|
649
|
-
|
857
|
+
|
650
858
|
async def run_with_semaphore(task):
|
651
859
|
async with semaphore:
|
652
860
|
return await task
|
653
|
-
|
654
|
-
results = await asyncio.gather(
|
655
|
-
run_with_semaphore(task) for task in tasks
|
656
|
-
|
861
|
+
|
862
|
+
results = await asyncio.gather(
|
863
|
+
*[run_with_semaphore(task) for task in tasks]
|
864
|
+
)
|
657
865
|
else:
|
658
866
|
# Run checks sequentially
|
659
867
|
results = []
|
660
868
|
for check_name in self.health_checks.keys():
|
661
869
|
result = await self.run_health_check(check_name)
|
662
870
|
results.append(result)
|
663
|
-
|
871
|
+
|
664
872
|
return results
|
665
|
-
|
873
|
+
|
666
874
|
async def get_health_summary(self) -> HealthSummary:
|
667
875
|
"""Get summary of all health checks."""
|
668
876
|
results = await self.run_all_health_checks()
|
669
|
-
|
877
|
+
|
670
878
|
healthy_count = sum(1 for r in results if r.status == HealthStatus.HEALTHY)
|
671
|
-
degraded_count = sum(1 for r in results if r.status == HealthStatus.DEGRADED)
|
879
|
+
degraded_count = sum(1 for r in results if r.status == HealthStatus.DEGRADED)
|
672
880
|
unhealthy_count = sum(1 for r in results if r.status == HealthStatus.UNHEALTHY)
|
673
|
-
|
881
|
+
|
674
882
|
# Determine overall status
|
675
883
|
if unhealthy_count > 0:
|
676
884
|
overall_status = HealthStatus.UNHEALTHY
|
@@ -680,27 +888,29 @@ class HealthCheckManager:
|
|
680
888
|
overall_status = HealthStatus.HEALTHY
|
681
889
|
else:
|
682
890
|
overall_status = HealthStatus.UNKNOWN
|
683
|
-
|
891
|
+
|
684
892
|
return HealthSummary(
|
685
893
|
total_checks=len(results),
|
686
894
|
healthy_checks=healthy_count,
|
687
895
|
degraded_checks=degraded_count,
|
688
896
|
unhealthy_checks=unhealthy_count,
|
689
897
|
overall_status=overall_status,
|
690
|
-
details=results
|
898
|
+
details=results,
|
691
899
|
)
|
692
|
-
|
900
|
+
|
693
901
|
def add_status_change_callback(self, callback: Callable):
|
694
902
|
"""Add callback for status changes."""
|
695
903
|
self.status_change_callbacks.append(callback)
|
696
|
-
|
697
|
-
def get_health_history(
|
904
|
+
|
905
|
+
def get_health_history(
|
906
|
+
self, check_name: str, limit: int = None
|
907
|
+
) -> List[HealthCheckResult]:
|
698
908
|
"""Get health check history for a specific check."""
|
699
909
|
history = self.history.get(check_name, [])
|
700
910
|
if limit:
|
701
911
|
return history[-limit:]
|
702
912
|
return history
|
703
|
-
|
913
|
+
|
704
914
|
async def _check_status_change(self, check_name: str, result: HealthCheckResult):
|
705
915
|
"""Check if status has changed and notify callbacks."""
|
706
916
|
if check_name in self.last_results:
|
@@ -712,39 +922,57 @@ class HealthCheckManager:
|
|
712
922
|
await callback(check_name, result)
|
713
923
|
except Exception as e:
|
714
924
|
logger.error(f"Error in status change callback: {e}")
|
715
|
-
|
925
|
+
|
716
926
|
async def shutdown(self):
|
717
927
|
"""Shutdown the health check manager."""
|
718
928
|
self._running = False
|
719
929
|
# Any cleanup logic here
|
720
930
|
|
721
931
|
|
932
|
+
# Global health manager instance for convenience functions
|
933
|
+
_global_health_manager: Optional[HealthCheckManager] = None
|
934
|
+
|
935
|
+
|
936
|
+
def get_health_manager() -> HealthCheckManager:
|
937
|
+
"""Get the global health manager instance."""
|
938
|
+
global _global_health_manager
|
939
|
+
if _global_health_manager is None:
|
940
|
+
config = {
|
941
|
+
"enabled": True,
|
942
|
+
"default_interval": 30.0,
|
943
|
+
"parallel_checks": True,
|
944
|
+
"max_concurrent_checks": 10,
|
945
|
+
}
|
946
|
+
_global_health_manager = HealthCheckManager(config)
|
947
|
+
return _global_health_manager
|
948
|
+
|
949
|
+
|
722
950
|
# Add convenience functions for registering health checks
|
723
|
-
async def register_database_health_check(
|
951
|
+
async def register_database_health_check(
|
952
|
+
name: str, database_node, interval: float = 30.0
|
953
|
+
):
|
724
954
|
"""Register a database health check with global manager."""
|
725
955
|
health_check = DatabaseHealthCheck(name, database_node)
|
726
956
|
manager = get_health_manager()
|
727
|
-
|
728
|
-
manager.register_health_check(health_check, interval)
|
729
|
-
else:
|
730
|
-
manager.register_check(name, health_check)
|
957
|
+
manager.register_health_check(health_check, interval)
|
731
958
|
|
732
959
|
|
733
|
-
async def register_memory_health_check(
|
734
|
-
|
960
|
+
async def register_memory_health_check(
|
961
|
+
name: str,
|
962
|
+
warning_threshold: float = 80.0,
|
963
|
+
critical_threshold: float = 95.0,
|
964
|
+
interval: float = 30.0,
|
965
|
+
):
|
735
966
|
"""Register a memory health check with global manager."""
|
736
|
-
|
737
|
-
|
967
|
+
health_check = MemoryHealthCheck(name, warning_threshold, critical_threshold)
|
968
|
+
manager = get_health_manager()
|
969
|
+
manager.register_health_check(health_check, interval)
|
738
970
|
|
739
971
|
|
740
|
-
async def register_custom_health_check(
|
741
|
-
|
972
|
+
async def register_custom_health_check(
|
973
|
+
name: str, check_func: Callable, interval: float = 30.0, timeout: float = 10.0
|
974
|
+
):
|
742
975
|
"""Register a custom health check with global manager."""
|
743
|
-
|
744
|
-
|
745
|
-
|
746
|
-
|
747
|
-
def get_health_manager():
|
748
|
-
"""Get the global health manager instance."""
|
749
|
-
# Return a default HealthMonitor for now
|
750
|
-
return HealthMonitor()
|
976
|
+
health_check = CustomHealthCheck(name, check_func, timeout=timeout)
|
977
|
+
manager = get_health_manager()
|
978
|
+
manager.register_health_check(health_check, interval)
|