kailash 0.6.2__py3-none-any.whl → 0.6.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- kailash/__init__.py +3 -3
- kailash/api/custom_nodes_secure.py +3 -3
- kailash/api/gateway.py +1 -1
- kailash/api/studio.py +2 -3
- kailash/api/workflow_api.py +3 -4
- kailash/core/resilience/bulkhead.py +460 -0
- kailash/core/resilience/circuit_breaker.py +92 -10
- kailash/edge/discovery.py +86 -0
- kailash/mcp_server/__init__.py +334 -0
- kailash/mcp_server/advanced_features.py +1022 -0
- kailash/{mcp → mcp_server}/ai_registry_server.py +29 -4
- kailash/mcp_server/auth.py +789 -0
- kailash/mcp_server/client.py +712 -0
- kailash/mcp_server/discovery.py +1593 -0
- kailash/mcp_server/errors.py +673 -0
- kailash/mcp_server/oauth.py +1727 -0
- kailash/mcp_server/protocol.py +1126 -0
- kailash/mcp_server/registry_integration.py +587 -0
- kailash/mcp_server/server.py +1747 -0
- kailash/{mcp → mcp_server}/servers/ai_registry.py +2 -2
- kailash/mcp_server/transports.py +1169 -0
- kailash/mcp_server/utils/cache.py +510 -0
- kailash/middleware/auth/auth_manager.py +3 -3
- kailash/middleware/communication/api_gateway.py +2 -9
- kailash/middleware/communication/realtime.py +1 -1
- kailash/middleware/mcp/client_integration.py +1 -1
- kailash/middleware/mcp/enhanced_server.py +2 -2
- kailash/nodes/__init__.py +2 -0
- kailash/nodes/admin/audit_log.py +6 -6
- kailash/nodes/admin/permission_check.py +8 -8
- kailash/nodes/admin/role_management.py +32 -28
- kailash/nodes/admin/schema.sql +6 -1
- kailash/nodes/admin/schema_manager.py +13 -13
- kailash/nodes/admin/security_event.py +16 -20
- kailash/nodes/admin/tenant_isolation.py +3 -3
- kailash/nodes/admin/transaction_utils.py +3 -3
- kailash/nodes/admin/user_management.py +21 -22
- kailash/nodes/ai/a2a.py +11 -11
- kailash/nodes/ai/ai_providers.py +9 -12
- kailash/nodes/ai/embedding_generator.py +13 -14
- kailash/nodes/ai/intelligent_agent_orchestrator.py +19 -19
- kailash/nodes/ai/iterative_llm_agent.py +3 -3
- kailash/nodes/ai/llm_agent.py +213 -36
- kailash/nodes/ai/self_organizing.py +2 -2
- kailash/nodes/alerts/discord.py +4 -4
- kailash/nodes/api/graphql.py +6 -6
- kailash/nodes/api/http.py +12 -17
- kailash/nodes/api/rate_limiting.py +4 -4
- kailash/nodes/api/rest.py +15 -15
- kailash/nodes/auth/mfa.py +3 -4
- kailash/nodes/auth/risk_assessment.py +2 -2
- kailash/nodes/auth/session_management.py +5 -5
- kailash/nodes/auth/sso.py +143 -0
- kailash/nodes/base.py +6 -2
- kailash/nodes/base_async.py +16 -2
- kailash/nodes/base_with_acl.py +2 -2
- kailash/nodes/cache/__init__.py +9 -0
- kailash/nodes/cache/cache.py +1172 -0
- kailash/nodes/cache/cache_invalidation.py +870 -0
- kailash/nodes/cache/redis_pool_manager.py +595 -0
- kailash/nodes/code/async_python.py +2 -1
- kailash/nodes/code/python.py +196 -35
- kailash/nodes/compliance/data_retention.py +6 -6
- kailash/nodes/compliance/gdpr.py +5 -5
- kailash/nodes/data/__init__.py +10 -0
- kailash/nodes/data/optimistic_locking.py +906 -0
- kailash/nodes/data/readers.py +8 -8
- kailash/nodes/data/redis.py +349 -0
- kailash/nodes/data/sql.py +314 -3
- kailash/nodes/data/streaming.py +21 -0
- kailash/nodes/enterprise/__init__.py +8 -0
- kailash/nodes/enterprise/audit_logger.py +285 -0
- kailash/nodes/enterprise/batch_processor.py +22 -3
- kailash/nodes/enterprise/data_lineage.py +1 -1
- kailash/nodes/enterprise/mcp_executor.py +205 -0
- kailash/nodes/enterprise/service_discovery.py +150 -0
- kailash/nodes/enterprise/tenant_assignment.py +108 -0
- kailash/nodes/logic/async_operations.py +2 -2
- kailash/nodes/logic/convergence.py +1 -1
- kailash/nodes/logic/operations.py +1 -1
- kailash/nodes/monitoring/__init__.py +11 -1
- kailash/nodes/monitoring/health_check.py +456 -0
- kailash/nodes/monitoring/log_processor.py +817 -0
- kailash/nodes/monitoring/metrics_collector.py +627 -0
- kailash/nodes/monitoring/performance_benchmark.py +137 -11
- kailash/nodes/rag/advanced.py +7 -7
- kailash/nodes/rag/agentic.py +49 -2
- kailash/nodes/rag/conversational.py +3 -3
- kailash/nodes/rag/evaluation.py +3 -3
- kailash/nodes/rag/federated.py +3 -3
- kailash/nodes/rag/graph.py +3 -3
- kailash/nodes/rag/multimodal.py +3 -3
- kailash/nodes/rag/optimized.py +5 -5
- kailash/nodes/rag/privacy.py +3 -3
- kailash/nodes/rag/query_processing.py +6 -6
- kailash/nodes/rag/realtime.py +1 -1
- kailash/nodes/rag/registry.py +2 -6
- kailash/nodes/rag/router.py +1 -1
- kailash/nodes/rag/similarity.py +7 -7
- kailash/nodes/rag/strategies.py +4 -4
- kailash/nodes/security/abac_evaluator.py +6 -6
- kailash/nodes/security/behavior_analysis.py +5 -6
- kailash/nodes/security/credential_manager.py +1 -1
- kailash/nodes/security/rotating_credentials.py +11 -11
- kailash/nodes/security/threat_detection.py +8 -8
- kailash/nodes/testing/credential_testing.py +2 -2
- kailash/nodes/transform/processors.py +5 -5
- kailash/runtime/local.py +162 -14
- kailash/runtime/parameter_injection.py +425 -0
- kailash/runtime/parameter_injector.py +657 -0
- kailash/runtime/testing.py +2 -2
- kailash/testing/fixtures.py +2 -2
- kailash/workflow/builder.py +99 -18
- kailash/workflow/builder_improvements.py +207 -0
- kailash/workflow/input_handling.py +170 -0
- {kailash-0.6.2.dist-info → kailash-0.6.4.dist-info}/METADATA +21 -8
- {kailash-0.6.2.dist-info → kailash-0.6.4.dist-info}/RECORD +126 -101
- kailash/mcp/__init__.py +0 -53
- kailash/mcp/client.py +0 -445
- kailash/mcp/server.py +0 -292
- kailash/mcp/server_enhanced.py +0 -449
- kailash/mcp/utils/cache.py +0 -267
- /kailash/{mcp → mcp_server}/client_new.py +0 -0
- /kailash/{mcp → mcp_server}/utils/__init__.py +0 -0
- /kailash/{mcp → mcp_server}/utils/config.py +0 -0
- /kailash/{mcp → mcp_server}/utils/formatters.py +0 -0
- /kailash/{mcp → mcp_server}/utils/metrics.py +0 -0
- {kailash-0.6.2.dist-info → kailash-0.6.4.dist-info}/WHEEL +0 -0
- {kailash-0.6.2.dist-info → kailash-0.6.4.dist-info}/entry_points.txt +0 -0
- {kailash-0.6.2.dist-info → kailash-0.6.4.dist-info}/licenses/LICENSE +0 -0
- {kailash-0.6.2.dist-info → kailash-0.6.4.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,108 @@
|
|
1
|
+
"""Enterprise tenant assignment node for multi-tenant workflows."""
|
2
|
+
|
3
|
+
import time
|
4
|
+
from typing import Any, Dict
|
5
|
+
|
6
|
+
from kailash.nodes.base import Node, NodeMetadata, NodeParameter, register_node
|
7
|
+
from kailash.sdk_exceptions import NodeExecutionError
|
8
|
+
|
9
|
+
|
10
|
+
@register_node()
|
11
|
+
class TenantAssignmentNode(Node):
|
12
|
+
"""Assigns tenant context based on user authentication information.
|
13
|
+
|
14
|
+
This node takes authenticated user information and assigns appropriate
|
15
|
+
tenant context including permissions, tier, and compliance settings.
|
16
|
+
"""
|
17
|
+
|
18
|
+
metadata = NodeMetadata(
|
19
|
+
name="TenantAssignmentNode",
|
20
|
+
description="Assigns tenant context for multi-tenant applications",
|
21
|
+
version="1.0.0",
|
22
|
+
tags={"enterprise", "tenant", "security"},
|
23
|
+
)
|
24
|
+
|
25
|
+
def __init__(self, name: str = None, **kwargs):
|
26
|
+
self.name = name or self.__class__.__name__
|
27
|
+
super().__init__(name=self.name, **kwargs)
|
28
|
+
|
29
|
+
def get_parameters(self) -> Dict[str, NodeParameter]:
|
30
|
+
return {
|
31
|
+
"user_id": NodeParameter(
|
32
|
+
name="user_id",
|
33
|
+
type=str,
|
34
|
+
description="Authenticated user ID",
|
35
|
+
required=True,
|
36
|
+
),
|
37
|
+
"verified": NodeParameter(
|
38
|
+
name="verified",
|
39
|
+
type=bool,
|
40
|
+
description="Whether user passed MFA verification",
|
41
|
+
required=False,
|
42
|
+
default=False,
|
43
|
+
),
|
44
|
+
"attributes": NodeParameter(
|
45
|
+
name="attributes",
|
46
|
+
type=dict,
|
47
|
+
description="User attributes from SSO",
|
48
|
+
required=False,
|
49
|
+
default={},
|
50
|
+
),
|
51
|
+
}
|
52
|
+
|
53
|
+
def run(
|
54
|
+
self, user_id: str, verified: bool = False, attributes: Dict = None, **kwargs
|
55
|
+
) -> Dict[str, Any]:
|
56
|
+
"""Assign tenant based on user information."""
|
57
|
+
try:
|
58
|
+
if attributes is None:
|
59
|
+
attributes = {}
|
60
|
+
|
61
|
+
# Extract email from user_id or attributes
|
62
|
+
email = (
|
63
|
+
user_id
|
64
|
+
if "@" in user_id
|
65
|
+
else attributes.get("email", "unknown@example.com")
|
66
|
+
)
|
67
|
+
|
68
|
+
# Determine tenant based on email domain
|
69
|
+
if "@healthcare" in email:
|
70
|
+
tenant = {
|
71
|
+
"id": "healthcare-corp",
|
72
|
+
"tier": "enterprise",
|
73
|
+
"compliance_zones": ["hipaa", "gdpr"],
|
74
|
+
"data_residency": "us-east-1",
|
75
|
+
}
|
76
|
+
elif "@finance" in email:
|
77
|
+
tenant = {
|
78
|
+
"id": "finance-inc",
|
79
|
+
"tier": "premium",
|
80
|
+
"compliance_zones": ["sox", "pci_dss"],
|
81
|
+
"data_residency": "us-east-1",
|
82
|
+
}
|
83
|
+
else:
|
84
|
+
tenant = {
|
85
|
+
"id": "default",
|
86
|
+
"tier": "standard",
|
87
|
+
"compliance_zones": ["public"],
|
88
|
+
"data_residency": "us-west-1",
|
89
|
+
}
|
90
|
+
|
91
|
+
# Create user context
|
92
|
+
user_context = {
|
93
|
+
"user_id": user_id,
|
94
|
+
"tenant_id": tenant["id"],
|
95
|
+
"permissions": ["read", "write"] if verified else ["read"],
|
96
|
+
"session_id": f"session-{int(time.time())}",
|
97
|
+
"compliance_zones": tenant["compliance_zones"],
|
98
|
+
"data_residency": tenant["data_residency"],
|
99
|
+
}
|
100
|
+
|
101
|
+
return {
|
102
|
+
"tenant": tenant,
|
103
|
+
"user_context": user_context,
|
104
|
+
"assignment_timestamp": time.time(),
|
105
|
+
}
|
106
|
+
|
107
|
+
except Exception as e:
|
108
|
+
raise NodeExecutionError(f"Tenant assignment failed: {str(e)}")
|
@@ -217,7 +217,7 @@ class AsyncMergeNode(AsyncNode):
|
|
217
217
|
# This will be properly wrapped by the execute() method
|
218
218
|
# which will call it in a sync context
|
219
219
|
raise RuntimeError(
|
220
|
-
"AsyncMergeNode.
|
220
|
+
"AsyncMergeNode.execute() was called directly. Use execute() or execute_async() instead."
|
221
221
|
)
|
222
222
|
|
223
223
|
async def _async_concat(self, data_inputs: list[Any], chunk_size: int) -> Any:
|
@@ -642,7 +642,7 @@ class AsyncSwitchNode(AsyncNode):
|
|
642
642
|
# This will be properly wrapped by the execute() method
|
643
643
|
# which will call it in a sync context
|
644
644
|
raise RuntimeError(
|
645
|
-
"AsyncSwitchNode.
|
645
|
+
"AsyncSwitchNode.execute() was called directly. Use execute() or execute_async() instead."
|
646
646
|
)
|
647
647
|
|
648
648
|
async def _evaluate_condition(
|
@@ -619,7 +619,7 @@ class MultiCriteriaConvergenceNode(CycleAwareNode):
|
|
619
619
|
}
|
620
620
|
|
621
621
|
# Run individual convergence check
|
622
|
-
result = checker.
|
622
|
+
result = checker.execute(context=mock_context, **checker_params)
|
623
623
|
|
624
624
|
results[metric_name] = {
|
625
625
|
"converged": result["converged"],
|
@@ -1,5 +1,15 @@
|
|
1
1
|
"""Monitoring nodes for connection and workflow visualization."""
|
2
2
|
|
3
3
|
from .connection_dashboard import ConnectionDashboardNode
|
4
|
+
from .health_check import HealthCheckNode
|
5
|
+
from .log_processor import LogProcessorNode
|
6
|
+
from .metrics_collector import MetricsCollectorNode
|
7
|
+
from .performance_benchmark import PerformanceBenchmarkNode
|
4
8
|
|
5
|
-
__all__ = [
|
9
|
+
__all__ = [
|
10
|
+
"ConnectionDashboardNode",
|
11
|
+
"HealthCheckNode",
|
12
|
+
"LogProcessorNode",
|
13
|
+
"MetricsCollectorNode",
|
14
|
+
"PerformanceBenchmarkNode",
|
15
|
+
]
|
@@ -0,0 +1,456 @@
|
|
1
|
+
"""Health check node for monitoring service availability.
|
2
|
+
|
3
|
+
This module provides comprehensive health checking capabilities for various
|
4
|
+
services including HTTP endpoints, databases, and custom health checks.
|
5
|
+
"""
|
6
|
+
|
7
|
+
import asyncio
|
8
|
+
import time
|
9
|
+
from datetime import UTC, datetime
|
10
|
+
from enum import Enum
|
11
|
+
from typing import Any, Callable, Dict, List, Optional, Tuple, Union
|
12
|
+
|
13
|
+
import aiohttp
|
14
|
+
import asyncpg
|
15
|
+
|
16
|
+
from kailash.nodes.base import NodeParameter, register_node
|
17
|
+
from kailash.nodes.base_async import AsyncNode
|
18
|
+
from kailash.sdk_exceptions import NodeExecutionError
|
19
|
+
|
20
|
+
|
21
|
+
class HealthStatus(Enum):
|
22
|
+
"""Health check status values."""
|
23
|
+
|
24
|
+
HEALTHY = "healthy"
|
25
|
+
UNHEALTHY = "unhealthy"
|
26
|
+
DEGRADED = "degraded"
|
27
|
+
UNKNOWN = "unknown"
|
28
|
+
|
29
|
+
|
30
|
+
class ServiceType(Enum):
|
31
|
+
"""Types of services that can be health checked."""
|
32
|
+
|
33
|
+
HTTP = "http"
|
34
|
+
DATABASE = "database"
|
35
|
+
REDIS = "redis"
|
36
|
+
CUSTOM = "custom"
|
37
|
+
|
38
|
+
|
39
|
+
@register_node()
|
40
|
+
class HealthCheckNode(AsyncNode):
|
41
|
+
"""Node for performing health checks on various services.
|
42
|
+
|
43
|
+
This node provides comprehensive health checking capabilities including:
|
44
|
+
- HTTP endpoint health checks with configurable methods and status codes
|
45
|
+
- Database connectivity verification with query execution
|
46
|
+
- Redis connection and operation verification
|
47
|
+
- Custom health check function execution
|
48
|
+
- Configurable timeouts and retries
|
49
|
+
- Health status aggregation for multiple services
|
50
|
+
- Detailed latency measurements
|
51
|
+
|
52
|
+
Design Purpose:
|
53
|
+
- Monitor service availability and performance
|
54
|
+
- Enable proactive issue detection
|
55
|
+
- Support various service types and protocols
|
56
|
+
- Provide detailed health metrics for observability
|
57
|
+
|
58
|
+
Examples:
|
59
|
+
>>> # HTTP health check
|
60
|
+
>>> health_checker = HealthCheckNode()
|
61
|
+
>>> result = await health_checker.execute(
|
62
|
+
... services=[{
|
63
|
+
... "name": "api",
|
64
|
+
... "type": "http",
|
65
|
+
... "url": "https://api.example.com/health",
|
66
|
+
... "method": "GET",
|
67
|
+
... "expected_status": [200, 204]
|
68
|
+
... }]
|
69
|
+
... )
|
70
|
+
|
71
|
+
>>> # Database health check
|
72
|
+
>>> result = await health_checker.execute(
|
73
|
+
... services=[{
|
74
|
+
... "name": "postgres",
|
75
|
+
... "type": "database",
|
76
|
+
... "connection_string": "postgresql://user:pass@localhost/db",
|
77
|
+
... "test_query": "SELECT 1"
|
78
|
+
... }]
|
79
|
+
... )
|
80
|
+
"""
|
81
|
+
|
82
|
+
def __init__(self, **kwargs):
|
83
|
+
"""Initialize the health check node."""
|
84
|
+
super().__init__(**kwargs)
|
85
|
+
self.logger.info(f"Initialized HealthCheckNode: {self.id}")
|
86
|
+
|
87
|
+
def get_parameters(self) -> Dict[str, NodeParameter]:
|
88
|
+
"""Define the parameters this node accepts."""
|
89
|
+
return {
|
90
|
+
"services": NodeParameter(
|
91
|
+
name="services",
|
92
|
+
type=list,
|
93
|
+
required=True,
|
94
|
+
description="List of services to health check",
|
95
|
+
),
|
96
|
+
"timeout": NodeParameter(
|
97
|
+
name="timeout",
|
98
|
+
type=float,
|
99
|
+
required=False,
|
100
|
+
default=30.0,
|
101
|
+
description="Timeout in seconds for each health check",
|
102
|
+
),
|
103
|
+
"parallel": NodeParameter(
|
104
|
+
name="parallel",
|
105
|
+
type=bool,
|
106
|
+
required=False,
|
107
|
+
default=True,
|
108
|
+
description="Whether to run health checks in parallel",
|
109
|
+
),
|
110
|
+
"fail_fast": NodeParameter(
|
111
|
+
name="fail_fast",
|
112
|
+
type=bool,
|
113
|
+
required=False,
|
114
|
+
default=False,
|
115
|
+
description="Stop checking on first failure",
|
116
|
+
),
|
117
|
+
"retries": NodeParameter(
|
118
|
+
name="retries",
|
119
|
+
type=int,
|
120
|
+
required=False,
|
121
|
+
default=3,
|
122
|
+
description="Number of retries for failed checks",
|
123
|
+
),
|
124
|
+
"retry_delay": NodeParameter(
|
125
|
+
name="retry_delay",
|
126
|
+
type=float,
|
127
|
+
required=False,
|
128
|
+
default=1.0,
|
129
|
+
description="Delay between retries in seconds",
|
130
|
+
),
|
131
|
+
}
|
132
|
+
|
133
|
+
def get_output_schema(self) -> Dict[str, NodeParameter]:
|
134
|
+
"""Define the output schema for this node."""
|
135
|
+
return {
|
136
|
+
"overall_status": NodeParameter(
|
137
|
+
name="overall_status",
|
138
|
+
type=str,
|
139
|
+
description="Overall health status (healthy/unhealthy/degraded)",
|
140
|
+
),
|
141
|
+
"services": NodeParameter(
|
142
|
+
name="services",
|
143
|
+
type=dict,
|
144
|
+
description="Health status for each service",
|
145
|
+
),
|
146
|
+
"healthy_count": NodeParameter(
|
147
|
+
name="healthy_count",
|
148
|
+
type=int,
|
149
|
+
description="Number of healthy services",
|
150
|
+
),
|
151
|
+
"unhealthy_count": NodeParameter(
|
152
|
+
name="unhealthy_count",
|
153
|
+
type=int,
|
154
|
+
description="Number of unhealthy services",
|
155
|
+
),
|
156
|
+
"total_latency": NodeParameter(
|
157
|
+
name="total_latency",
|
158
|
+
type=float,
|
159
|
+
description="Total time taken for all health checks",
|
160
|
+
),
|
161
|
+
"timestamp": NodeParameter(
|
162
|
+
name="timestamp",
|
163
|
+
type=str,
|
164
|
+
description="ISO timestamp of health check execution",
|
165
|
+
),
|
166
|
+
}
|
167
|
+
|
168
|
+
async def async_run(self, **kwargs) -> Dict[str, Any]:
|
169
|
+
"""Execute health checks on configured services."""
|
170
|
+
services = kwargs["services"]
|
171
|
+
timeout = kwargs.get("timeout", 30.0)
|
172
|
+
parallel = kwargs.get("parallel", True)
|
173
|
+
fail_fast = kwargs.get("fail_fast", False)
|
174
|
+
retries = kwargs.get("retries", 3)
|
175
|
+
retry_delay = kwargs.get("retry_delay", 1.0)
|
176
|
+
|
177
|
+
start_time = time.time()
|
178
|
+
results = {}
|
179
|
+
|
180
|
+
try:
|
181
|
+
if parallel:
|
182
|
+
# Run health checks in parallel
|
183
|
+
tasks = []
|
184
|
+
for service in services:
|
185
|
+
task = self._check_service_with_retry(
|
186
|
+
service, timeout, retries, retry_delay
|
187
|
+
)
|
188
|
+
tasks.append(task)
|
189
|
+
|
190
|
+
# Use gather with return_exceptions to handle failures
|
191
|
+
check_results = await asyncio.gather(*tasks, return_exceptions=True)
|
192
|
+
|
193
|
+
# Process results
|
194
|
+
for service, result in zip(services, check_results):
|
195
|
+
if isinstance(result, Exception):
|
196
|
+
results[service["name"]] = {
|
197
|
+
"status": HealthStatus.UNHEALTHY.value,
|
198
|
+
"error": str(result),
|
199
|
+
"latency": None,
|
200
|
+
}
|
201
|
+
else:
|
202
|
+
results[service["name"]] = result
|
203
|
+
else:
|
204
|
+
# Run health checks sequentially
|
205
|
+
for service in services:
|
206
|
+
try:
|
207
|
+
result = await self._check_service_with_retry(
|
208
|
+
service, timeout, retries, retry_delay
|
209
|
+
)
|
210
|
+
results[service["name"]] = result
|
211
|
+
|
212
|
+
if (
|
213
|
+
fail_fast
|
214
|
+
and result["status"] == HealthStatus.UNHEALTHY.value
|
215
|
+
):
|
216
|
+
break
|
217
|
+
except Exception as e:
|
218
|
+
results[service["name"]] = {
|
219
|
+
"status": HealthStatus.UNHEALTHY.value,
|
220
|
+
"error": str(e),
|
221
|
+
"latency": None,
|
222
|
+
}
|
223
|
+
if fail_fast:
|
224
|
+
break
|
225
|
+
|
226
|
+
# Calculate overall status
|
227
|
+
healthy_count = sum(
|
228
|
+
1 for r in results.values() if r["status"] == HealthStatus.HEALTHY.value
|
229
|
+
)
|
230
|
+
unhealthy_count = sum(
|
231
|
+
1
|
232
|
+
for r in results.values()
|
233
|
+
if r["status"] == HealthStatus.UNHEALTHY.value
|
234
|
+
)
|
235
|
+
|
236
|
+
if unhealthy_count == 0:
|
237
|
+
overall_status = HealthStatus.HEALTHY.value
|
238
|
+
elif healthy_count == 0:
|
239
|
+
overall_status = HealthStatus.UNHEALTHY.value
|
240
|
+
else:
|
241
|
+
overall_status = HealthStatus.DEGRADED.value
|
242
|
+
|
243
|
+
total_latency = time.time() - start_time
|
244
|
+
|
245
|
+
return {
|
246
|
+
"overall_status": overall_status,
|
247
|
+
"services": results,
|
248
|
+
"healthy_count": healthy_count,
|
249
|
+
"unhealthy_count": unhealthy_count,
|
250
|
+
"total_latency": total_latency,
|
251
|
+
"timestamp": datetime.now(UTC).isoformat(),
|
252
|
+
}
|
253
|
+
|
254
|
+
except Exception as e:
|
255
|
+
self.logger.error(f"Health check failed: {str(e)}")
|
256
|
+
raise NodeExecutionError(f"Health check execution failed: {str(e)}")
|
257
|
+
|
258
|
+
async def _check_service_with_retry(
|
259
|
+
self, service: Dict[str, Any], timeout: float, retries: int, retry_delay: float
|
260
|
+
) -> Dict[str, Any]:
|
261
|
+
"""Check a service with retry logic."""
|
262
|
+
last_error = None
|
263
|
+
|
264
|
+
for attempt in range(retries):
|
265
|
+
try:
|
266
|
+
return await self._check_service(service, timeout)
|
267
|
+
except Exception as e:
|
268
|
+
last_error = e
|
269
|
+
if attempt < retries - 1:
|
270
|
+
await asyncio.sleep(retry_delay)
|
271
|
+
self.logger.debug(
|
272
|
+
f"Retrying health check for {service['name']} "
|
273
|
+
f"(attempt {attempt + 2}/{retries})"
|
274
|
+
)
|
275
|
+
|
276
|
+
# All retries failed
|
277
|
+
return {
|
278
|
+
"status": HealthStatus.UNHEALTHY.value,
|
279
|
+
"error": str(last_error),
|
280
|
+
"latency": None,
|
281
|
+
"retries": retries,
|
282
|
+
}
|
283
|
+
|
284
|
+
async def _check_service(
|
285
|
+
self, service: Dict[str, Any], timeout: float
|
286
|
+
) -> Dict[str, Any]:
|
287
|
+
"""Check a single service health."""
|
288
|
+
service_type = ServiceType(service.get("type", "http"))
|
289
|
+
start_time = time.time()
|
290
|
+
|
291
|
+
try:
|
292
|
+
if service_type == ServiceType.HTTP:
|
293
|
+
result = await self._check_http_service(service, timeout)
|
294
|
+
elif service_type == ServiceType.DATABASE:
|
295
|
+
result = await self._check_database_service(service, timeout)
|
296
|
+
elif service_type == ServiceType.REDIS:
|
297
|
+
result = await self._check_redis_service(service, timeout)
|
298
|
+
elif service_type == ServiceType.CUSTOM:
|
299
|
+
result = await self._check_custom_service(service, timeout)
|
300
|
+
else:
|
301
|
+
raise ValueError(f"Unsupported service type: {service_type}")
|
302
|
+
|
303
|
+
latency = time.time() - start_time
|
304
|
+
result["latency"] = latency
|
305
|
+
return result
|
306
|
+
|
307
|
+
except asyncio.TimeoutError:
|
308
|
+
return {
|
309
|
+
"status": HealthStatus.UNHEALTHY.value,
|
310
|
+
"error": f"Health check timed out after {timeout}s",
|
311
|
+
"latency": timeout,
|
312
|
+
}
|
313
|
+
except Exception as e:
|
314
|
+
return {
|
315
|
+
"status": HealthStatus.UNHEALTHY.value,
|
316
|
+
"error": str(e),
|
317
|
+
"latency": time.time() - start_time,
|
318
|
+
}
|
319
|
+
|
320
|
+
async def _check_http_service(
|
321
|
+
self, service: Dict[str, Any], timeout: float
|
322
|
+
) -> Dict[str, Any]:
|
323
|
+
"""Check HTTP endpoint health."""
|
324
|
+
url = service["url"]
|
325
|
+
method = service.get("method", "GET").upper()
|
326
|
+
expected_status = service.get("expected_status", [200])
|
327
|
+
headers = service.get("headers", {})
|
328
|
+
|
329
|
+
if isinstance(expected_status, int):
|
330
|
+
expected_status = [expected_status]
|
331
|
+
|
332
|
+
async with aiohttp.ClientSession() as session:
|
333
|
+
async with session.request(
|
334
|
+
method=method,
|
335
|
+
url=url,
|
336
|
+
headers=headers,
|
337
|
+
timeout=aiohttp.ClientTimeout(total=timeout),
|
338
|
+
) as response:
|
339
|
+
if response.status in expected_status:
|
340
|
+
return {
|
341
|
+
"status": HealthStatus.HEALTHY.value,
|
342
|
+
"status_code": response.status,
|
343
|
+
"response_time": response.headers.get("X-Response-Time"),
|
344
|
+
}
|
345
|
+
else:
|
346
|
+
return {
|
347
|
+
"status": HealthStatus.UNHEALTHY.value,
|
348
|
+
"status_code": response.status,
|
349
|
+
"error": f"Unexpected status code: {response.status}",
|
350
|
+
}
|
351
|
+
|
352
|
+
async def _check_database_service(
|
353
|
+
self, service: Dict[str, Any], timeout: float
|
354
|
+
) -> Dict[str, Any]:
|
355
|
+
"""Check database health."""
|
356
|
+
connection_string = service["connection_string"]
|
357
|
+
test_query = service.get("test_query", "SELECT 1")
|
358
|
+
|
359
|
+
try:
|
360
|
+
conn = await asyncio.wait_for(
|
361
|
+
asyncpg.connect(connection_string), timeout=timeout
|
362
|
+
)
|
363
|
+
|
364
|
+
try:
|
365
|
+
# Execute test query
|
366
|
+
result = await asyncio.wait_for(
|
367
|
+
conn.fetchval(test_query), timeout=timeout
|
368
|
+
)
|
369
|
+
|
370
|
+
return {
|
371
|
+
"status": HealthStatus.HEALTHY.value,
|
372
|
+
"query_result": result,
|
373
|
+
}
|
374
|
+
finally:
|
375
|
+
await conn.close()
|
376
|
+
|
377
|
+
except Exception as e:
|
378
|
+
return {
|
379
|
+
"status": HealthStatus.UNHEALTHY.value,
|
380
|
+
"error": f"Database check failed: {str(e)}",
|
381
|
+
}
|
382
|
+
|
383
|
+
async def _check_redis_service(
|
384
|
+
self, service: Dict[str, Any], timeout: float
|
385
|
+
) -> Dict[str, Any]:
|
386
|
+
"""Check Redis health."""
|
387
|
+
try:
|
388
|
+
import redis.asyncio as redis
|
389
|
+
|
390
|
+
redis_url = service.get("url", "redis://localhost:6379")
|
391
|
+
|
392
|
+
client = redis.from_url(
|
393
|
+
redis_url, socket_connect_timeout=timeout, socket_timeout=timeout
|
394
|
+
)
|
395
|
+
|
396
|
+
try:
|
397
|
+
# Ping Redis
|
398
|
+
pong = await client.ping()
|
399
|
+
|
400
|
+
if pong:
|
401
|
+
return {"status": HealthStatus.HEALTHY.value}
|
402
|
+
else:
|
403
|
+
return {
|
404
|
+
"status": HealthStatus.UNHEALTHY.value,
|
405
|
+
"error": "Redis ping failed",
|
406
|
+
}
|
407
|
+
finally:
|
408
|
+
await client.close()
|
409
|
+
|
410
|
+
except ImportError:
|
411
|
+
return {
|
412
|
+
"status": HealthStatus.UNHEALTHY.value,
|
413
|
+
"error": "Redis client not installed (pip install redis)",
|
414
|
+
}
|
415
|
+
except Exception as e:
|
416
|
+
return {
|
417
|
+
"status": HealthStatus.UNHEALTHY.value,
|
418
|
+
"error": f"Redis check failed: {str(e)}",
|
419
|
+
}
|
420
|
+
|
421
|
+
async def _check_custom_service(
|
422
|
+
self, service: Dict[str, Any], timeout: float
|
423
|
+
) -> Dict[str, Any]:
|
424
|
+
"""Check custom service using provided function."""
|
425
|
+
check_function = service.get("check_function")
|
426
|
+
|
427
|
+
if not check_function or not callable(check_function):
|
428
|
+
return {
|
429
|
+
"status": HealthStatus.UNHEALTHY.value,
|
430
|
+
"error": "No valid check_function provided",
|
431
|
+
}
|
432
|
+
|
433
|
+
try:
|
434
|
+
# Run custom check function with timeout
|
435
|
+
if asyncio.iscoroutinefunction(check_function):
|
436
|
+
result = await asyncio.wait_for(check_function(), timeout=timeout)
|
437
|
+
else:
|
438
|
+
# Run sync function in executor
|
439
|
+
loop = asyncio.get_event_loop()
|
440
|
+
result = await asyncio.wait_for(
|
441
|
+
loop.run_in_executor(None, check_function), timeout=timeout
|
442
|
+
)
|
443
|
+
|
444
|
+
# Custom function should return dict with status
|
445
|
+
if isinstance(result, dict) and "status" in result:
|
446
|
+
return result
|
447
|
+
elif result:
|
448
|
+
return {"status": HealthStatus.HEALTHY.value, "result": result}
|
449
|
+
else:
|
450
|
+
return {"status": HealthStatus.UNHEALTHY.value, "result": result}
|
451
|
+
|
452
|
+
except Exception as e:
|
453
|
+
return {
|
454
|
+
"status": HealthStatus.UNHEALTHY.value,
|
455
|
+
"error": f"Custom check failed: {str(e)}",
|
456
|
+
}
|