kailash 0.2.1__py3-none-any.whl → 0.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- kailash/__init__.py +1 -1
- kailash/api/custom_nodes_secure.py +2 -2
- kailash/api/studio_secure.py +1 -1
- kailash/mcp/client_new.py +1 -1
- kailash/nodes/ai/a2a.py +1 -1
- kailash/nodes/api/__init__.py +26 -0
- kailash/nodes/api/monitoring.py +463 -0
- kailash/nodes/api/security.py +822 -0
- kailash/nodes/base.py +3 -3
- kailash/nodes/code/python.py +6 -0
- kailash/nodes/data/__init__.py +9 -0
- kailash/nodes/data/directory.py +278 -0
- kailash/nodes/data/event_generation.py +297 -0
- kailash/nodes/data/file_discovery.py +601 -0
- kailash/nodes/data/sql.py +2 -2
- kailash/nodes/transform/processors.py +32 -1
- kailash/runtime/async_local.py +1 -1
- kailash/runtime/docker.py +4 -4
- kailash/runtime/local.py +41 -4
- kailash/runtime/parallel.py +2 -2
- kailash/runtime/parallel_cyclic.py +2 -2
- kailash/runtime/testing.py +2 -2
- kailash/utils/templates.py +6 -6
- kailash/visualization/performance.py +16 -3
- kailash/visualization/reports.py +5 -1
- kailash/workflow/convergence.py +1 -1
- kailash/workflow/cycle_analyzer.py +8 -1
- kailash/workflow/cyclic_runner.py +1 -1
- kailash/workflow/graph.py +33 -6
- kailash/workflow/visualization.py +10 -2
- kailash-0.3.0.dist-info/METADATA +428 -0
- {kailash-0.2.1.dist-info → kailash-0.3.0.dist-info}/RECORD +36 -31
- kailash-0.2.1.dist-info/METADATA +0 -1617
- {kailash-0.2.1.dist-info → kailash-0.3.0.dist-info}/WHEEL +0 -0
- {kailash-0.2.1.dist-info → kailash-0.3.0.dist-info}/entry_points.txt +0 -0
- {kailash-0.2.1.dist-info → kailash-0.3.0.dist-info}/licenses/LICENSE +0 -0
- {kailash-0.2.1.dist-info → kailash-0.3.0.dist-info}/top_level.txt +0 -0
kailash/__init__.py
CHANGED
@@ -350,7 +350,7 @@ async def _execute_api_node(
|
|
350
350
|
) -> Dict[str, Any]:
|
351
351
|
"""Execute an API-based custom node"""
|
352
352
|
|
353
|
-
from kailash.nodes.api.http import
|
353
|
+
from kailash.nodes.api.http import HTTPRequestNode
|
354
354
|
from kailash.security import TenantContext
|
355
355
|
|
356
356
|
# Execute in tenant context
|
@@ -359,7 +359,7 @@ async def _execute_api_node(
|
|
359
359
|
api_config = node.implementation.get("api", {})
|
360
360
|
|
361
361
|
# Create HTTP client node
|
362
|
-
http_node =
|
362
|
+
http_node = HTTPRequestNode(
|
363
363
|
url=api_config.get("url", ""),
|
364
364
|
method=api_config.get("method", "GET"),
|
365
365
|
headers=api_config.get("headers", {}),
|
kailash/api/studio_secure.py
CHANGED
kailash/mcp/client_new.py
CHANGED
@@ -292,7 +292,7 @@ class MCPClient:
|
|
292
292
|
|
293
293
|
# Convenience functions for LLM agents
|
294
294
|
async def discover_and_prepare_tools(
|
295
|
-
mcp_servers: List[Union[str, Dict[str, Any]]]
|
295
|
+
mcp_servers: List[Union[str, Dict[str, Any]]],
|
296
296
|
) -> List[Dict[str, Any]]:
|
297
297
|
"""
|
298
298
|
Discover tools from multiple MCP servers and prepare them for LLM use.
|
kailash/nodes/ai/a2a.py
CHANGED
@@ -832,7 +832,7 @@ Focus on actionable intelligence rather than just listing what each agent said."
|
|
832
832
|
summary = result.get("response", {}).get("content", "")
|
833
833
|
if summary:
|
834
834
|
return f"Shared Context Summary:\n{summary}"
|
835
|
-
except:
|
835
|
+
except Exception:
|
836
836
|
pass
|
837
837
|
|
838
838
|
# Fallback to simple summary
|
kailash/nodes/api/__init__.py
CHANGED
@@ -18,9 +18,12 @@ Design philosophy:
|
|
18
18
|
- Enable both synchronous and asynchronous operation
|
19
19
|
"""
|
20
20
|
|
21
|
+
import warnings
|
22
|
+
|
21
23
|
from .auth import APIKeyNode, BasicAuthNode, OAuth2Node
|
22
24
|
from .graphql import AsyncGraphQLClientNode, GraphQLClientNode
|
23
25
|
from .http import AsyncHTTPRequestNode, HTTPRequestNode
|
26
|
+
from .monitoring import HealthCheckNode
|
24
27
|
from .rate_limiting import (
|
25
28
|
AsyncRateLimitedAPINode,
|
26
29
|
RateLimitConfig,
|
@@ -31,6 +34,24 @@ from .rate_limiting import (
|
|
31
34
|
create_rate_limiter,
|
32
35
|
)
|
33
36
|
from .rest import AsyncRESTClientNode, RESTClientNode
|
37
|
+
from .security import SecurityScannerNode
|
38
|
+
|
39
|
+
# Backwards compatibility aliases
|
40
|
+
HTTPClientNode = HTTPRequestNode # Deprecated: Use HTTPRequestNode instead
|
41
|
+
|
42
|
+
|
43
|
+
def __getattr__(name):
|
44
|
+
"""Provide deprecation warnings for backwards compatibility."""
|
45
|
+
if name == "HTTPClientNode":
|
46
|
+
warnings.warn(
|
47
|
+
"HTTPClientNode is deprecated and will be removed in v0.3.0. "
|
48
|
+
"Use HTTPRequestNode instead.",
|
49
|
+
DeprecationWarning,
|
50
|
+
stacklevel=2,
|
51
|
+
)
|
52
|
+
return HTTPRequestNode
|
53
|
+
raise AttributeError(f"module '{__name__}' has no attribute '{name}'")
|
54
|
+
|
34
55
|
|
35
56
|
__all__ = [
|
36
57
|
"HTTPRequestNode",
|
@@ -49,4 +70,9 @@ __all__ = [
|
|
49
70
|
"RateLimitedAPINode",
|
50
71
|
"AsyncRateLimitedAPINode",
|
51
72
|
"create_rate_limiter",
|
73
|
+
# Monitoring and Security
|
74
|
+
"HealthCheckNode",
|
75
|
+
"SecurityScannerNode",
|
76
|
+
# Backwards compatibility
|
77
|
+
"HTTPClientNode", # Deprecated alias
|
52
78
|
]
|
@@ -0,0 +1,463 @@
|
|
1
|
+
"""Monitoring and health check nodes for system observability."""
|
2
|
+
|
3
|
+
import asyncio
|
4
|
+
import socket
|
5
|
+
import subprocess
|
6
|
+
import time
|
7
|
+
from datetime import datetime, timezone
|
8
|
+
from typing import Any, Dict, List
|
9
|
+
|
10
|
+
import requests
|
11
|
+
|
12
|
+
from kailash.nodes.base import Node, NodeParameter, register_node
|
13
|
+
|
14
|
+
|
15
|
+
@register_node()
|
16
|
+
class HealthCheckNode(Node):
|
17
|
+
"""
|
18
|
+
Performs health checks on various system components and services.
|
19
|
+
|
20
|
+
This node provides comprehensive health monitoring capabilities for
|
21
|
+
distributed systems, replacing DataTransformer with embedded Python code
|
22
|
+
for monitoring tasks. It supports HTTP endpoints, TCP ports, databases,
|
23
|
+
file systems, and custom health check commands.
|
24
|
+
|
25
|
+
Design Philosophy:
|
26
|
+
Modern distributed systems require robust health monitoring. This node
|
27
|
+
provides a declarative way to define health checks without writing
|
28
|
+
custom code in DataTransformer nodes. It standardizes health check
|
29
|
+
patterns and provides consistent output formats.
|
30
|
+
|
31
|
+
Upstream Dependencies:
|
32
|
+
- Configuration nodes with endpoint definitions
|
33
|
+
- Service discovery nodes
|
34
|
+
- Timer nodes for scheduled checks
|
35
|
+
- Alert threshold nodes
|
36
|
+
|
37
|
+
Downstream Consumers:
|
38
|
+
- Alert generation nodes
|
39
|
+
- Dashboard visualization nodes
|
40
|
+
- Logging and metrics nodes
|
41
|
+
- Auto-scaling decision nodes
|
42
|
+
- Incident response workflows
|
43
|
+
|
44
|
+
Configuration:
|
45
|
+
- Target endpoints and services
|
46
|
+
- Check types and parameters
|
47
|
+
- Timeout and retry settings
|
48
|
+
- Success/failure criteria
|
49
|
+
- Alert thresholds
|
50
|
+
|
51
|
+
Implementation Details:
|
52
|
+
- Parallel execution of multiple checks
|
53
|
+
- Proper timeout handling
|
54
|
+
- Retry logic with exponential backoff
|
55
|
+
- Structured output with metrics
|
56
|
+
- Support for various check types
|
57
|
+
|
58
|
+
Error Handling:
|
59
|
+
- Graceful handling of network failures
|
60
|
+
- Timeout management
|
61
|
+
- Invalid configuration detection
|
62
|
+
- Partial failure reporting
|
63
|
+
|
64
|
+
Side Effects:
|
65
|
+
- Network requests to target systems
|
66
|
+
- File system access for disk checks
|
67
|
+
- Process execution for custom commands
|
68
|
+
- Minimal impact design
|
69
|
+
|
70
|
+
Examples:
|
71
|
+
>>> # HTTP endpoint health checks
|
72
|
+
>>> health_check = HealthCheckNode(
|
73
|
+
... targets=[
|
74
|
+
... {'type': 'http', 'url': 'https://api.example.com/health'},
|
75
|
+
... {'type': 'http', 'url': 'https://app.example.com/status'}
|
76
|
+
... ],
|
77
|
+
... timeout=30
|
78
|
+
... )
|
79
|
+
>>> result = health_check.execute()
|
80
|
+
>>> assert 'health_results' in result
|
81
|
+
>>> assert result['summary']['total_checks'] == 2
|
82
|
+
>>>
|
83
|
+
>>> # Mixed health checks
|
84
|
+
>>> health_check = HealthCheckNode(
|
85
|
+
... targets=[
|
86
|
+
... {'type': 'tcp', 'host': 'database.example.com', 'port': 5432},
|
87
|
+
... {'type': 'disk', 'path': '/var/log', 'threshold': 80},
|
88
|
+
... {'type': 'command', 'command': 'systemctl is-active nginx'}
|
89
|
+
... ]
|
90
|
+
... )
|
91
|
+
>>> result = health_check.execute()
|
92
|
+
>>> assert 'health_results' in result
|
93
|
+
"""
|
94
|
+
|
95
|
+
def get_parameters(self) -> Dict[str, NodeParameter]:
|
96
|
+
return {
|
97
|
+
"targets": NodeParameter(
|
98
|
+
name="targets",
|
99
|
+
type=list,
|
100
|
+
required=True,
|
101
|
+
description="List of health check targets with type and configuration",
|
102
|
+
),
|
103
|
+
"timeout": NodeParameter(
|
104
|
+
name="timeout",
|
105
|
+
type=int,
|
106
|
+
required=False,
|
107
|
+
default=30,
|
108
|
+
description="Timeout in seconds for each health check",
|
109
|
+
),
|
110
|
+
"retries": NodeParameter(
|
111
|
+
name="retries",
|
112
|
+
type=int,
|
113
|
+
required=False,
|
114
|
+
default=2,
|
115
|
+
description="Number of retry attempts for failed checks",
|
116
|
+
),
|
117
|
+
"parallel": NodeParameter(
|
118
|
+
name="parallel",
|
119
|
+
type=bool,
|
120
|
+
required=False,
|
121
|
+
default=True,
|
122
|
+
description="Execute health checks in parallel",
|
123
|
+
),
|
124
|
+
"include_metrics": NodeParameter(
|
125
|
+
name="include_metrics",
|
126
|
+
type=bool,
|
127
|
+
required=False,
|
128
|
+
default=True,
|
129
|
+
description="Include performance metrics in results",
|
130
|
+
),
|
131
|
+
}
|
132
|
+
|
133
|
+
def run(self, **kwargs) -> Dict[str, Any]:
|
134
|
+
targets = kwargs["targets"]
|
135
|
+
timeout = kwargs.get("timeout", 30)
|
136
|
+
retries = kwargs.get("retries", 2)
|
137
|
+
parallel = kwargs.get("parallel", True)
|
138
|
+
include_metrics = kwargs.get("include_metrics", True)
|
139
|
+
|
140
|
+
start_time = time.time()
|
141
|
+
|
142
|
+
if parallel:
|
143
|
+
# Use asyncio for parallel execution
|
144
|
+
results = asyncio.run(
|
145
|
+
self._run_checks_parallel(targets, timeout, retries, include_metrics)
|
146
|
+
)
|
147
|
+
else:
|
148
|
+
# Sequential execution
|
149
|
+
results = self._run_checks_sequential(
|
150
|
+
targets, timeout, retries, include_metrics
|
151
|
+
)
|
152
|
+
|
153
|
+
execution_time = time.time() - start_time
|
154
|
+
|
155
|
+
# Generate summary
|
156
|
+
summary = self._generate_summary(results, execution_time)
|
157
|
+
|
158
|
+
return {
|
159
|
+
"health_results": results,
|
160
|
+
"summary": summary,
|
161
|
+
"check_count": len(results),
|
162
|
+
"healthy_count": len([r for r in results if r["status"] == "healthy"]),
|
163
|
+
"unhealthy_count": len([r for r in results if r["status"] == "unhealthy"]),
|
164
|
+
"execution_time": execution_time,
|
165
|
+
"timestamp": datetime.now(timezone.utc).isoformat() + "Z",
|
166
|
+
}
|
167
|
+
|
168
|
+
async def _run_checks_parallel(
|
169
|
+
self, targets: List[Dict], timeout: int, retries: int, include_metrics: bool
|
170
|
+
) -> List[Dict[str, Any]]:
|
171
|
+
"""Run health checks in parallel using asyncio."""
|
172
|
+
|
173
|
+
async def run_single_check(target):
|
174
|
+
return await asyncio.get_event_loop().run_in_executor(
|
175
|
+
None,
|
176
|
+
self._perform_health_check,
|
177
|
+
target,
|
178
|
+
timeout,
|
179
|
+
retries,
|
180
|
+
include_metrics,
|
181
|
+
)
|
182
|
+
|
183
|
+
tasks = [run_single_check(target) for target in targets]
|
184
|
+
return await asyncio.gather(*tasks, return_exceptions=True)
|
185
|
+
|
186
|
+
def _run_checks_sequential(
|
187
|
+
self, targets: List[Dict], timeout: int, retries: int, include_metrics: bool
|
188
|
+
) -> List[Dict[str, Any]]:
|
189
|
+
"""Run health checks sequentially."""
|
190
|
+
return [
|
191
|
+
self._perform_health_check(target, timeout, retries, include_metrics)
|
192
|
+
for target in targets
|
193
|
+
]
|
194
|
+
|
195
|
+
def _perform_health_check(
|
196
|
+
self, target: Dict, timeout: int, retries: int, include_metrics: bool
|
197
|
+
) -> Dict[str, Any]:
|
198
|
+
"""Perform a single health check with retry logic."""
|
199
|
+
|
200
|
+
check_type = target.get("type", "unknown")
|
201
|
+
check_id = target.get("id", f"{check_type}_{hash(str(target)) % 10000}")
|
202
|
+
|
203
|
+
for attempt in range(retries + 1):
|
204
|
+
try:
|
205
|
+
start_time = time.time()
|
206
|
+
|
207
|
+
if check_type == "http":
|
208
|
+
result = self._check_http(target, timeout)
|
209
|
+
elif check_type == "tcp":
|
210
|
+
result = self._check_tcp(target, timeout)
|
211
|
+
elif check_type == "disk":
|
212
|
+
result = self._check_disk(target)
|
213
|
+
elif check_type == "command":
|
214
|
+
result = self._check_command(target, timeout)
|
215
|
+
elif check_type == "database":
|
216
|
+
result = self._check_database(target, timeout)
|
217
|
+
else:
|
218
|
+
result = {
|
219
|
+
"status": "unhealthy",
|
220
|
+
"message": f"Unknown check type: {check_type}",
|
221
|
+
"details": {},
|
222
|
+
}
|
223
|
+
|
224
|
+
# Add timing information
|
225
|
+
response_time = time.time() - start_time
|
226
|
+
result["response_time"] = response_time
|
227
|
+
result["attempt"] = attempt + 1
|
228
|
+
result["check_id"] = check_id
|
229
|
+
result["check_type"] = check_type
|
230
|
+
result["target"] = target
|
231
|
+
result["timestamp"] = datetime.now(timezone.utc).isoformat() + "Z"
|
232
|
+
|
233
|
+
# If successful, return immediately
|
234
|
+
if result["status"] == "healthy":
|
235
|
+
return result
|
236
|
+
|
237
|
+
except Exception as e:
|
238
|
+
if attempt == retries: # Last attempt
|
239
|
+
return {
|
240
|
+
"check_id": check_id,
|
241
|
+
"check_type": check_type,
|
242
|
+
"target": target,
|
243
|
+
"status": "unhealthy",
|
244
|
+
"message": f"Health check failed after {retries + 1} attempts: {str(e)}",
|
245
|
+
"details": {"error": str(e), "error_type": type(e).__name__},
|
246
|
+
"response_time": time.time() - start_time,
|
247
|
+
"attempt": attempt + 1,
|
248
|
+
"timestamp": datetime.now(timezone.utc).isoformat() + "Z",
|
249
|
+
}
|
250
|
+
|
251
|
+
# Wait before retry (exponential backoff)
|
252
|
+
time.sleep(min(2**attempt, 10))
|
253
|
+
|
254
|
+
return result
|
255
|
+
|
256
|
+
def _check_http(self, target: Dict, timeout: int) -> Dict[str, Any]:
|
257
|
+
"""Perform HTTP health check."""
|
258
|
+
url = target["url"]
|
259
|
+
expected_status = target.get("expected_status", 200)
|
260
|
+
expected_content = target.get("expected_content")
|
261
|
+
headers = target.get("headers", {})
|
262
|
+
|
263
|
+
response = requests.get(url, timeout=timeout, headers=headers)
|
264
|
+
|
265
|
+
# Check status code
|
266
|
+
if response.status_code != expected_status:
|
267
|
+
return {
|
268
|
+
"status": "unhealthy",
|
269
|
+
"message": f"HTTP status {response.status_code}, expected {expected_status}",
|
270
|
+
"details": {
|
271
|
+
"status_code": response.status_code,
|
272
|
+
"response_size": len(response.content),
|
273
|
+
"url": url,
|
274
|
+
},
|
275
|
+
}
|
276
|
+
|
277
|
+
# Check content if specified
|
278
|
+
if expected_content and expected_content not in response.text:
|
279
|
+
return {
|
280
|
+
"status": "unhealthy",
|
281
|
+
"message": f"Expected content '{expected_content}' not found in response",
|
282
|
+
"details": {
|
283
|
+
"status_code": response.status_code,
|
284
|
+
"response_size": len(response.content),
|
285
|
+
"url": url,
|
286
|
+
},
|
287
|
+
}
|
288
|
+
|
289
|
+
return {
|
290
|
+
"status": "healthy",
|
291
|
+
"message": f"HTTP check successful: {response.status_code}",
|
292
|
+
"details": {
|
293
|
+
"status_code": response.status_code,
|
294
|
+
"response_size": len(response.content),
|
295
|
+
"url": url,
|
296
|
+
},
|
297
|
+
}
|
298
|
+
|
299
|
+
def _check_tcp(self, target: Dict, timeout: int) -> Dict[str, Any]:
|
300
|
+
"""Perform TCP port connectivity check."""
|
301
|
+
host = target["host"]
|
302
|
+
port = target["port"]
|
303
|
+
|
304
|
+
sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
|
305
|
+
sock.settimeout(timeout)
|
306
|
+
|
307
|
+
try:
|
308
|
+
result = sock.connect_ex((host, port))
|
309
|
+
if result == 0:
|
310
|
+
return {
|
311
|
+
"status": "healthy",
|
312
|
+
"message": f"TCP connection successful to {host}:{port}",
|
313
|
+
"details": {"host": host, "port": port},
|
314
|
+
}
|
315
|
+
else:
|
316
|
+
return {
|
317
|
+
"status": "unhealthy",
|
318
|
+
"message": f"TCP connection failed to {host}:{port}",
|
319
|
+
"details": {"host": host, "port": port, "error_code": result},
|
320
|
+
}
|
321
|
+
finally:
|
322
|
+
sock.close()
|
323
|
+
|
324
|
+
def _check_disk(self, target: Dict) -> Dict[str, Any]:
|
325
|
+
"""Perform disk space check."""
|
326
|
+
import shutil
|
327
|
+
|
328
|
+
path = target["path"]
|
329
|
+
threshold = target.get("threshold", 90) # Default 90% threshold
|
330
|
+
|
331
|
+
try:
|
332
|
+
total, used, free = shutil.disk_usage(path)
|
333
|
+
usage_percent = (used / total) * 100
|
334
|
+
|
335
|
+
if usage_percent > threshold:
|
336
|
+
return {
|
337
|
+
"status": "unhealthy",
|
338
|
+
"message": f"Disk usage {usage_percent:.1f}% exceeds threshold {threshold}%",
|
339
|
+
"details": {
|
340
|
+
"path": path,
|
341
|
+
"usage_percent": usage_percent,
|
342
|
+
"threshold": threshold,
|
343
|
+
"total_gb": total / (1024**3),
|
344
|
+
"used_gb": used / (1024**3),
|
345
|
+
"free_gb": free / (1024**3),
|
346
|
+
},
|
347
|
+
}
|
348
|
+
else:
|
349
|
+
return {
|
350
|
+
"status": "healthy",
|
351
|
+
"message": f"Disk usage {usage_percent:.1f}% within threshold",
|
352
|
+
"details": {
|
353
|
+
"path": path,
|
354
|
+
"usage_percent": usage_percent,
|
355
|
+
"threshold": threshold,
|
356
|
+
"total_gb": total / (1024**3),
|
357
|
+
"used_gb": used / (1024**3),
|
358
|
+
"free_gb": free / (1024**3),
|
359
|
+
},
|
360
|
+
}
|
361
|
+
except Exception as e:
|
362
|
+
return {
|
363
|
+
"status": "unhealthy",
|
364
|
+
"message": f"Disk check failed: {str(e)}",
|
365
|
+
"details": {"path": path, "error": str(e)},
|
366
|
+
}
|
367
|
+
|
368
|
+
def _check_command(self, target: Dict, timeout: int) -> Dict[str, Any]:
|
369
|
+
"""Perform custom command health check."""
|
370
|
+
command = target["command"]
|
371
|
+
expected_exit_code = target.get("expected_exit_code", 0)
|
372
|
+
|
373
|
+
try:
|
374
|
+
result = subprocess.run(
|
375
|
+
command,
|
376
|
+
shell=True,
|
377
|
+
timeout=timeout,
|
378
|
+
capture_output=True,
|
379
|
+
text=True,
|
380
|
+
)
|
381
|
+
|
382
|
+
if result.returncode == expected_exit_code:
|
383
|
+
return {
|
384
|
+
"status": "healthy",
|
385
|
+
"message": f"Command succeeded with exit code {result.returncode}",
|
386
|
+
"details": {
|
387
|
+
"command": command,
|
388
|
+
"exit_code": result.returncode,
|
389
|
+
"stdout": result.stdout.strip(),
|
390
|
+
"stderr": result.stderr.strip(),
|
391
|
+
},
|
392
|
+
}
|
393
|
+
else:
|
394
|
+
return {
|
395
|
+
"status": "unhealthy",
|
396
|
+
"message": f"Command failed with exit code {result.returncode}",
|
397
|
+
"details": {
|
398
|
+
"command": command,
|
399
|
+
"exit_code": result.returncode,
|
400
|
+
"expected_exit_code": expected_exit_code,
|
401
|
+
"stdout": result.stdout.strip(),
|
402
|
+
"stderr": result.stderr.strip(),
|
403
|
+
},
|
404
|
+
}
|
405
|
+
except subprocess.TimeoutExpired:
|
406
|
+
return {
|
407
|
+
"status": "unhealthy",
|
408
|
+
"message": f"Command timed out after {timeout} seconds",
|
409
|
+
"details": {"command": command, "timeout": timeout},
|
410
|
+
}
|
411
|
+
|
412
|
+
def _check_database(self, target: Dict, timeout: int) -> Dict[str, Any]:
|
413
|
+
"""Perform database connectivity check."""
|
414
|
+
# This is a simplified example - in production, you'd use actual database drivers
|
415
|
+
db_type = target.get("db_type", "postgresql")
|
416
|
+
host = target["host"]
|
417
|
+
port = target.get("port", 5432 if db_type == "postgresql" else 3306)
|
418
|
+
|
419
|
+
# For now, just check TCP connectivity
|
420
|
+
# In a real implementation, you'd use database-specific health checks
|
421
|
+
return self._check_tcp({"host": host, "port": port}, timeout)
|
422
|
+
|
423
|
+
def _generate_summary(
|
424
|
+
self, results: List[Dict], execution_time: float
|
425
|
+
) -> Dict[str, Any]:
|
426
|
+
"""Generate summary statistics from health check results."""
|
427
|
+
total_checks = len(results)
|
428
|
+
healthy_checks = len([r for r in results if r.get("status") == "healthy"])
|
429
|
+
unhealthy_checks = total_checks - healthy_checks
|
430
|
+
|
431
|
+
# Calculate average response time
|
432
|
+
response_times = [
|
433
|
+
r.get("response_time", 0) for r in results if "response_time" in r
|
434
|
+
]
|
435
|
+
avg_response_time = (
|
436
|
+
sum(response_times) / len(response_times) if response_times else 0
|
437
|
+
)
|
438
|
+
|
439
|
+
# Group by check type
|
440
|
+
check_types = {}
|
441
|
+
for result in results:
|
442
|
+
check_type = result.get("check_type", "unknown")
|
443
|
+
if check_type not in check_types:
|
444
|
+
check_types[check_type] = {"total": 0, "healthy": 0, "unhealthy": 0}
|
445
|
+
|
446
|
+
check_types[check_type]["total"] += 1
|
447
|
+
if result.get("status") == "healthy":
|
448
|
+
check_types[check_type]["healthy"] += 1
|
449
|
+
else:
|
450
|
+
check_types[check_type]["unhealthy"] += 1
|
451
|
+
|
452
|
+
return {
|
453
|
+
"total_checks": total_checks,
|
454
|
+
"healthy_checks": healthy_checks,
|
455
|
+
"unhealthy_checks": unhealthy_checks,
|
456
|
+
"health_percentage": (
|
457
|
+
(healthy_checks / total_checks * 100) if total_checks > 0 else 0
|
458
|
+
),
|
459
|
+
"average_response_time": avg_response_time,
|
460
|
+
"execution_time": execution_time,
|
461
|
+
"check_types": check_types,
|
462
|
+
"overall_status": "healthy" if unhealthy_checks == 0 else "unhealthy",
|
463
|
+
}
|