kailash 0.6.3__py3-none-any.whl → 0.6.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- kailash/__init__.py +3 -3
- kailash/api/custom_nodes_secure.py +3 -3
- kailash/api/gateway.py +1 -1
- kailash/api/studio.py +1 -1
- kailash/api/workflow_api.py +2 -2
- kailash/core/resilience/bulkhead.py +475 -0
- kailash/core/resilience/circuit_breaker.py +92 -10
- kailash/core/resilience/health_monitor.py +578 -0
- kailash/edge/discovery.py +86 -0
- kailash/mcp_server/__init__.py +309 -33
- kailash/mcp_server/advanced_features.py +1022 -0
- kailash/mcp_server/ai_registry_server.py +27 -2
- kailash/mcp_server/auth.py +789 -0
- kailash/mcp_server/client.py +645 -378
- kailash/mcp_server/discovery.py +1593 -0
- kailash/mcp_server/errors.py +673 -0
- kailash/mcp_server/oauth.py +1727 -0
- kailash/mcp_server/protocol.py +1126 -0
- kailash/mcp_server/registry_integration.py +587 -0
- kailash/mcp_server/server.py +1228 -96
- kailash/mcp_server/transports.py +1169 -0
- kailash/mcp_server/utils/__init__.py +6 -1
- kailash/mcp_server/utils/cache.py +250 -7
- kailash/middleware/auth/auth_manager.py +3 -3
- kailash/middleware/communication/api_gateway.py +1 -1
- kailash/middleware/communication/realtime.py +1 -1
- kailash/middleware/mcp/enhanced_server.py +1 -1
- kailash/nodes/__init__.py +2 -0
- kailash/nodes/admin/audit_log.py +6 -6
- kailash/nodes/admin/permission_check.py +8 -8
- kailash/nodes/admin/role_management.py +32 -28
- kailash/nodes/admin/schema.sql +6 -1
- kailash/nodes/admin/schema_manager.py +13 -13
- kailash/nodes/admin/security_event.py +15 -15
- kailash/nodes/admin/tenant_isolation.py +3 -3
- kailash/nodes/admin/transaction_utils.py +3 -3
- kailash/nodes/admin/user_management.py +21 -21
- kailash/nodes/ai/a2a.py +11 -11
- kailash/nodes/ai/ai_providers.py +9 -12
- kailash/nodes/ai/embedding_generator.py +13 -14
- kailash/nodes/ai/intelligent_agent_orchestrator.py +19 -19
- kailash/nodes/ai/iterative_llm_agent.py +2 -2
- kailash/nodes/ai/llm_agent.py +210 -33
- kailash/nodes/ai/self_organizing.py +2 -2
- kailash/nodes/alerts/discord.py +4 -4
- kailash/nodes/api/graphql.py +6 -6
- kailash/nodes/api/http.py +10 -10
- kailash/nodes/api/rate_limiting.py +4 -4
- kailash/nodes/api/rest.py +15 -15
- kailash/nodes/auth/mfa.py +3 -3
- kailash/nodes/auth/risk_assessment.py +2 -2
- kailash/nodes/auth/session_management.py +5 -5
- kailash/nodes/auth/sso.py +143 -0
- kailash/nodes/base.py +8 -2
- kailash/nodes/base_async.py +16 -2
- kailash/nodes/base_with_acl.py +2 -2
- kailash/nodes/cache/__init__.py +9 -0
- kailash/nodes/cache/cache.py +1172 -0
- kailash/nodes/cache/cache_invalidation.py +874 -0
- kailash/nodes/cache/redis_pool_manager.py +595 -0
- kailash/nodes/code/async_python.py +2 -1
- kailash/nodes/code/python.py +194 -30
- kailash/nodes/compliance/data_retention.py +6 -6
- kailash/nodes/compliance/gdpr.py +5 -5
- kailash/nodes/data/__init__.py +10 -0
- kailash/nodes/data/async_sql.py +1956 -129
- kailash/nodes/data/optimistic_locking.py +906 -0
- kailash/nodes/data/readers.py +8 -8
- kailash/nodes/data/redis.py +378 -0
- kailash/nodes/data/sql.py +314 -3
- kailash/nodes/data/streaming.py +21 -0
- kailash/nodes/enterprise/__init__.py +8 -0
- kailash/nodes/enterprise/audit_logger.py +285 -0
- kailash/nodes/enterprise/batch_processor.py +22 -3
- kailash/nodes/enterprise/data_lineage.py +1 -1
- kailash/nodes/enterprise/mcp_executor.py +205 -0
- kailash/nodes/enterprise/service_discovery.py +150 -0
- kailash/nodes/enterprise/tenant_assignment.py +108 -0
- kailash/nodes/logic/async_operations.py +2 -2
- kailash/nodes/logic/convergence.py +1 -1
- kailash/nodes/logic/operations.py +1 -1
- kailash/nodes/monitoring/__init__.py +11 -1
- kailash/nodes/monitoring/health_check.py +456 -0
- kailash/nodes/monitoring/log_processor.py +817 -0
- kailash/nodes/monitoring/metrics_collector.py +627 -0
- kailash/nodes/monitoring/performance_benchmark.py +137 -11
- kailash/nodes/rag/advanced.py +7 -7
- kailash/nodes/rag/agentic.py +49 -2
- kailash/nodes/rag/conversational.py +3 -3
- kailash/nodes/rag/evaluation.py +3 -3
- kailash/nodes/rag/federated.py +3 -3
- kailash/nodes/rag/graph.py +3 -3
- kailash/nodes/rag/multimodal.py +3 -3
- kailash/nodes/rag/optimized.py +5 -5
- kailash/nodes/rag/privacy.py +3 -3
- kailash/nodes/rag/query_processing.py +6 -6
- kailash/nodes/rag/realtime.py +1 -1
- kailash/nodes/rag/registry.py +1 -1
- kailash/nodes/rag/router.py +1 -1
- kailash/nodes/rag/similarity.py +7 -7
- kailash/nodes/rag/strategies.py +4 -4
- kailash/nodes/security/abac_evaluator.py +6 -6
- kailash/nodes/security/behavior_analysis.py +5 -5
- kailash/nodes/security/credential_manager.py +1 -1
- kailash/nodes/security/rotating_credentials.py +11 -11
- kailash/nodes/security/threat_detection.py +8 -8
- kailash/nodes/testing/credential_testing.py +2 -2
- kailash/nodes/transform/processors.py +5 -5
- kailash/runtime/local.py +163 -9
- kailash/runtime/parameter_injection.py +425 -0
- kailash/runtime/parameter_injector.py +657 -0
- kailash/runtime/testing.py +2 -2
- kailash/testing/fixtures.py +2 -2
- kailash/workflow/builder.py +99 -14
- kailash/workflow/builder_improvements.py +207 -0
- kailash/workflow/input_handling.py +170 -0
- {kailash-0.6.3.dist-info → kailash-0.6.5.dist-info}/METADATA +22 -9
- {kailash-0.6.3.dist-info → kailash-0.6.5.dist-info}/RECORD +122 -95
- {kailash-0.6.3.dist-info → kailash-0.6.5.dist-info}/WHEEL +0 -0
- {kailash-0.6.3.dist-info → kailash-0.6.5.dist-info}/entry_points.txt +0 -0
- {kailash-0.6.3.dist-info → kailash-0.6.5.dist-info}/licenses/LICENSE +0 -0
- {kailash-0.6.3.dist-info → kailash-0.6.5.dist-info}/top_level.txt +0 -0
kailash/__init__.py
CHANGED
@@ -3,8 +3,8 @@
|
|
3
3
|
The Kailash SDK provides a comprehensive framework for creating nodes and workflows
|
4
4
|
that align with container-node architecture while allowing rapid prototyping.
|
5
5
|
|
6
|
-
New in v0.
|
7
|
-
|
6
|
+
New in v0.6.5: Enterprise AsyncSQL enhancements with optimistic locking, comprehensive
|
7
|
+
testing improvements, and production-grade documentation.
|
8
8
|
"""
|
9
9
|
|
10
10
|
from kailash.nodes.base import Node, NodeMetadata, NodeParameter
|
@@ -33,7 +33,7 @@ except ImportError:
|
|
33
33
|
# For backward compatibility
|
34
34
|
WorkflowGraph = Workflow
|
35
35
|
|
36
|
-
__version__ = "0.6.
|
36
|
+
__version__ = "0.6.4"
|
37
37
|
|
38
38
|
__all__ = [
|
39
39
|
# Core workflow components
|
@@ -317,7 +317,7 @@ async def _execute_python_node(
|
|
317
317
|
)
|
318
318
|
|
319
319
|
# Run the node
|
320
|
-
result = python_node.
|
320
|
+
result = python_node.execute(**test_data)
|
321
321
|
|
322
322
|
return result
|
323
323
|
|
@@ -369,9 +369,9 @@ async def _execute_api_node(
|
|
369
369
|
# Prepare request data
|
370
370
|
if api_config.get("method") in ["POST", "PUT", "PATCH"]:
|
371
371
|
# Include test data in body
|
372
|
-
result = await http_node.
|
372
|
+
result = await http_node.execute(json_data=test_data)
|
373
373
|
else:
|
374
374
|
# Include test data as query params
|
375
|
-
result = await http_node.
|
375
|
+
result = await http_node.execute(params=test_data)
|
376
376
|
|
377
377
|
return result
|
kailash/api/gateway.py
CHANGED
@@ -29,7 +29,7 @@ Example:
|
|
29
29
|
>>> gateway.register_workflow("analytics", analytics_workflow)
|
30
30
|
|
31
31
|
>>> # Start server
|
32
|
-
>>> gateway.
|
32
|
+
>>> gateway.execute(port=8000) # doctest: +SKIP
|
33
33
|
|
34
34
|
>>> # With MCP integration
|
35
35
|
>>> from kailash.api.mcp_integration import MCPIntegration
|
kailash/api/studio.py
CHANGED
kailash/api/workflow_api.py
CHANGED
@@ -58,7 +58,7 @@ class WorkflowAPI:
|
|
58
58
|
>>> # For any workflow
|
59
59
|
>>> from my_workflows import rag_workflow
|
60
60
|
>>> api = WorkflowAPI(rag_workflow)
|
61
|
-
>>> api.
|
61
|
+
>>> api.execute(port=8000)
|
62
62
|
"""
|
63
63
|
|
64
64
|
def __init__(
|
@@ -388,7 +388,7 @@ def create_workflow_api(
|
|
388
388
|
|
389
389
|
Example:
|
390
390
|
>>> api = create_workflow_api(my_workflow, api_type="rag")
|
391
|
-
>>> api.
|
391
|
+
>>> api.execute(port=8000)
|
392
392
|
"""
|
393
393
|
api_classes = {
|
394
394
|
"generic": WorkflowAPI,
|
@@ -0,0 +1,475 @@
|
|
1
|
+
"""Bulkhead isolation pattern implementation for operation segregation.
|
2
|
+
|
3
|
+
This module implements the Bulkhead pattern to isolate different types of
|
4
|
+
operations with separate resource pools, preventing resource exhaustion
|
5
|
+
in one area from affecting other operations.
|
6
|
+
|
7
|
+
The bulkhead provides:
|
8
|
+
- Resource pool isolation by operation type
|
9
|
+
- Thread pool management for CPU-bound tasks
|
10
|
+
- Connection pool management for I/O operations
|
11
|
+
- Priority-based resource allocation
|
12
|
+
- Real-time monitoring and metrics
|
13
|
+
|
14
|
+
Example:
|
15
|
+
>>> bulkhead = BulkheadManager()
|
16
|
+
>>>
|
17
|
+
>>> # Execute with isolation
|
18
|
+
>>> async with bulkhead.get_partition("critical_operations") as partition:
|
19
|
+
... result = await partition.execute(critical_task)
|
20
|
+
"""
|
21
|
+
|
22
|
+
import asyncio
|
23
|
+
import logging
|
24
|
+
import threading
|
25
|
+
import time
|
26
|
+
from concurrent.futures import ThreadPoolExecutor
|
27
|
+
from contextlib import asynccontextmanager
|
28
|
+
from dataclasses import dataclass, field
|
29
|
+
from datetime import UTC, datetime
|
30
|
+
from enum import Enum
|
31
|
+
from typing import Any, Callable, Dict, List, Optional, Set, Union
|
32
|
+
from uuid import uuid4
|
33
|
+
|
34
|
+
logger = logging.getLogger(__name__)
|
35
|
+
|
36
|
+
|
37
|
+
class PartitionType(Enum):
|
38
|
+
"""Types of bulkhead partitions."""
|
39
|
+
|
40
|
+
CPU_BOUND = "cpu_bound" # For CPU-intensive operations
|
41
|
+
IO_BOUND = "io_bound" # For I/O operations
|
42
|
+
CRITICAL = "critical" # For critical high-priority operations
|
43
|
+
BACKGROUND = "background" # For background/batch operations
|
44
|
+
CUSTOM = "custom" # Custom partition types
|
45
|
+
|
46
|
+
|
47
|
+
class ResourceType(Enum):
|
48
|
+
"""Types of resources managed by bulkhead."""
|
49
|
+
|
50
|
+
THREADS = "threads"
|
51
|
+
CONNECTIONS = "connections"
|
52
|
+
MEMORY = "memory"
|
53
|
+
SEMAPHORE = "semaphore"
|
54
|
+
|
55
|
+
|
56
|
+
@dataclass
|
57
|
+
class PartitionConfig:
|
58
|
+
"""Configuration for a bulkhead partition."""
|
59
|
+
|
60
|
+
name: str
|
61
|
+
partition_type: PartitionType
|
62
|
+
max_concurrent_operations: int = 10
|
63
|
+
max_threads: Optional[int] = None # For CPU-bound partitions
|
64
|
+
max_connections: Optional[int] = None # For I/O partitions
|
65
|
+
timeout: int = 30 # Operation timeout in seconds
|
66
|
+
priority: int = 1 # Higher number = higher priority
|
67
|
+
queue_size: int = 100 # Max queued operations
|
68
|
+
isolation_level: str = "strict" # strict, relaxed, shared
|
69
|
+
circuit_breaker_enabled: bool = True
|
70
|
+
metrics_enabled: bool = True
|
71
|
+
resource_limits: Dict[ResourceType, int] = field(default_factory=dict)
|
72
|
+
|
73
|
+
|
74
|
+
@dataclass
|
75
|
+
class PartitionMetrics:
|
76
|
+
"""Metrics for a bulkhead partition."""
|
77
|
+
|
78
|
+
total_operations: int = 0
|
79
|
+
successful_operations: int = 0
|
80
|
+
failed_operations: int = 0
|
81
|
+
rejected_operations: int = 0
|
82
|
+
queued_operations: int = 0
|
83
|
+
active_operations: int = 0
|
84
|
+
avg_execution_time: float = 0.0
|
85
|
+
max_execution_time: float = 0.0
|
86
|
+
resource_utilization: Dict[ResourceType, float] = field(default_factory=dict)
|
87
|
+
last_activity: Optional[datetime] = None
|
88
|
+
created_at: datetime = field(default_factory=lambda: datetime.now(UTC))
|
89
|
+
|
90
|
+
|
91
|
+
class BulkheadPartition:
|
92
|
+
"""Individual partition within the bulkhead for operation isolation."""
|
93
|
+
|
94
|
+
def __init__(self, config: PartitionConfig):
|
95
|
+
"""Initialize bulkhead partition."""
|
96
|
+
self.config = config
|
97
|
+
self.metrics = PartitionMetrics()
|
98
|
+
self._lock = asyncio.Lock()
|
99
|
+
|
100
|
+
# Resource management
|
101
|
+
self._semaphore = asyncio.Semaphore(config.max_concurrent_operations)
|
102
|
+
self._thread_pool: Optional[ThreadPoolExecutor] = None
|
103
|
+
self._active_operations: Set[str] = set()
|
104
|
+
self._operation_queue: asyncio.Queue = asyncio.Queue(maxsize=config.queue_size)
|
105
|
+
|
106
|
+
# Initialize thread pool for CPU-bound operations
|
107
|
+
if config.partition_type == PartitionType.CPU_BOUND and config.max_threads:
|
108
|
+
self._thread_pool = ThreadPoolExecutor(
|
109
|
+
max_workers=config.max_threads,
|
110
|
+
thread_name_prefix=f"bulkhead-{config.name}",
|
111
|
+
)
|
112
|
+
|
113
|
+
# Circuit breaker integration
|
114
|
+
self._circuit_breaker = None
|
115
|
+
if config.circuit_breaker_enabled:
|
116
|
+
from kailash.core.resilience.circuit_breaker import (
|
117
|
+
CircuitBreakerConfig,
|
118
|
+
ConnectionCircuitBreaker,
|
119
|
+
)
|
120
|
+
|
121
|
+
breaker_config = CircuitBreakerConfig(
|
122
|
+
failure_threshold=5, recovery_timeout=30
|
123
|
+
)
|
124
|
+
self._circuit_breaker = ConnectionCircuitBreaker(breaker_config)
|
125
|
+
|
126
|
+
logger.info(f"Initialized bulkhead partition: {config.name}")
|
127
|
+
|
128
|
+
async def execute(
|
129
|
+
self,
|
130
|
+
func: Callable,
|
131
|
+
*args,
|
132
|
+
priority: Optional[int] = None,
|
133
|
+
timeout: Optional[int] = None,
|
134
|
+
**kwargs,
|
135
|
+
) -> Any:
|
136
|
+
"""Execute operation within partition isolation.
|
137
|
+
|
138
|
+
Args:
|
139
|
+
func: Function to execute
|
140
|
+
*args: Function arguments
|
141
|
+
priority: Operation priority (overrides partition default)
|
142
|
+
timeout: Operation timeout (overrides partition default)
|
143
|
+
**kwargs: Function keyword arguments
|
144
|
+
|
145
|
+
Returns:
|
146
|
+
Function result
|
147
|
+
|
148
|
+
Raises:
|
149
|
+
BulkheadRejectionError: If partition is overloaded
|
150
|
+
asyncio.TimeoutError: If operation times out
|
151
|
+
"""
|
152
|
+
operation_id = str(uuid4())
|
153
|
+
start_time = time.time()
|
154
|
+
|
155
|
+
# Use provided timeout or partition default
|
156
|
+
op_timeout = timeout or self.config.timeout
|
157
|
+
|
158
|
+
try:
|
159
|
+
# Check if partition can accept new operations
|
160
|
+
async with self._lock:
|
161
|
+
current_active = len(self._active_operations)
|
162
|
+
current_queued = self._operation_queue.qsize()
|
163
|
+
|
164
|
+
# Reject if no queue capacity (queue_size=0) and at capacity
|
165
|
+
if (
|
166
|
+
self.config.queue_size == 0
|
167
|
+
and current_active >= self.config.max_concurrent_operations
|
168
|
+
):
|
169
|
+
await self._record_rejection("no_queue_capacity")
|
170
|
+
raise BulkheadRejectionError(
|
171
|
+
f"Partition {self.config.name} has no queue capacity and is at max concurrent operations"
|
172
|
+
)
|
173
|
+
|
174
|
+
# Reject if queue is full
|
175
|
+
if self._operation_queue.full():
|
176
|
+
await self._record_rejection("queue_full")
|
177
|
+
raise BulkheadRejectionError(
|
178
|
+
f"Partition {self.config.name} queue is full"
|
179
|
+
)
|
180
|
+
|
181
|
+
# Queue the operation
|
182
|
+
await self._operation_queue.put((operation_id, func, args, kwargs))
|
183
|
+
|
184
|
+
async with self._lock:
|
185
|
+
self.metrics.queued_operations += 1
|
186
|
+
|
187
|
+
# Execute with circuit breaker if enabled
|
188
|
+
if self._circuit_breaker:
|
189
|
+
result = await self._circuit_breaker.call(
|
190
|
+
self._execute_isolated, operation_id, func, args, kwargs, op_timeout
|
191
|
+
)
|
192
|
+
else:
|
193
|
+
result = await self._execute_isolated(
|
194
|
+
operation_id, func, args, kwargs, op_timeout
|
195
|
+
)
|
196
|
+
|
197
|
+
execution_time = time.time() - start_time
|
198
|
+
await self._record_success(execution_time)
|
199
|
+
|
200
|
+
return result
|
201
|
+
|
202
|
+
except Exception as e:
|
203
|
+
execution_time = time.time() - start_time
|
204
|
+
await self._record_failure(execution_time)
|
205
|
+
raise
|
206
|
+
finally:
|
207
|
+
# Clean up
|
208
|
+
async with self._lock:
|
209
|
+
if operation_id in self._active_operations:
|
210
|
+
self._active_operations.remove(operation_id)
|
211
|
+
self.metrics.active_operations = len(self._active_operations)
|
212
|
+
|
213
|
+
async def _execute_isolated(
|
214
|
+
self, operation_id: str, func: Callable, args: tuple, kwargs: dict, timeout: int
|
215
|
+
) -> Any:
|
216
|
+
"""Execute operation with resource isolation."""
|
217
|
+
# Acquire semaphore (limits concurrent operations)
|
218
|
+
async with self._semaphore:
|
219
|
+
async with self._lock:
|
220
|
+
self._active_operations.add(operation_id)
|
221
|
+
self.metrics.active_operations = len(self._active_operations)
|
222
|
+
self.metrics.total_operations += 1
|
223
|
+
self.metrics.last_activity = datetime.now(UTC)
|
224
|
+
|
225
|
+
try:
|
226
|
+
# Remove from queue
|
227
|
+
await self._operation_queue.get()
|
228
|
+
|
229
|
+
# Execute based on partition type
|
230
|
+
if (
|
231
|
+
self.config.partition_type == PartitionType.CPU_BOUND
|
232
|
+
and self._thread_pool
|
233
|
+
):
|
234
|
+
# Run CPU-bound task in thread pool
|
235
|
+
loop = asyncio.get_event_loop()
|
236
|
+
result = await asyncio.wait_for(
|
237
|
+
loop.run_in_executor(self._thread_pool, func, *args),
|
238
|
+
timeout=timeout,
|
239
|
+
)
|
240
|
+
else:
|
241
|
+
# Run I/O-bound or async task directly
|
242
|
+
if asyncio.iscoroutinefunction(func):
|
243
|
+
result = await asyncio.wait_for(
|
244
|
+
func(*args, **kwargs), timeout=timeout
|
245
|
+
)
|
246
|
+
else:
|
247
|
+
# Synchronous function
|
248
|
+
result = await asyncio.wait_for(
|
249
|
+
asyncio.to_thread(func, *args, **kwargs), timeout=timeout
|
250
|
+
)
|
251
|
+
|
252
|
+
return result
|
253
|
+
|
254
|
+
finally:
|
255
|
+
async with self._lock:
|
256
|
+
self.metrics.queued_operations = max(
|
257
|
+
0, self.metrics.queued_operations - 1
|
258
|
+
)
|
259
|
+
|
260
|
+
async def _record_success(self, execution_time: float):
|
261
|
+
"""Record successful operation."""
|
262
|
+
async with self._lock:
|
263
|
+
self.metrics.successful_operations += 1
|
264
|
+
|
265
|
+
# Update execution time metrics
|
266
|
+
total_ops = self.metrics.successful_operations
|
267
|
+
current_avg = self.metrics.avg_execution_time
|
268
|
+
self.metrics.avg_execution_time = (
|
269
|
+
current_avg * (total_ops - 1) + execution_time
|
270
|
+
) / total_ops
|
271
|
+
|
272
|
+
if execution_time > self.metrics.max_execution_time:
|
273
|
+
self.metrics.max_execution_time = execution_time
|
274
|
+
|
275
|
+
async def _record_failure(self, execution_time: float):
|
276
|
+
"""Record failed operation."""
|
277
|
+
async with self._lock:
|
278
|
+
self.metrics.failed_operations += 1
|
279
|
+
|
280
|
+
async def _record_rejection(self, reason: str):
|
281
|
+
"""Record rejected operation."""
|
282
|
+
async with self._lock:
|
283
|
+
self.metrics.rejected_operations += 1
|
284
|
+
|
285
|
+
logger.warning(
|
286
|
+
f"Operation rejected from partition {self.config.name}: {reason}"
|
287
|
+
)
|
288
|
+
|
289
|
+
def get_status(self) -> Dict[str, Any]:
|
290
|
+
"""Get current partition status."""
|
291
|
+
return {
|
292
|
+
"name": self.config.name,
|
293
|
+
"type": self.config.partition_type.value,
|
294
|
+
"metrics": {
|
295
|
+
"total_operations": self.metrics.total_operations,
|
296
|
+
"successful_operations": self.metrics.successful_operations,
|
297
|
+
"failed_operations": self.metrics.failed_operations,
|
298
|
+
"rejected_operations": self.metrics.rejected_operations,
|
299
|
+
"active_operations": self.metrics.active_operations,
|
300
|
+
"queued_operations": self.metrics.queued_operations,
|
301
|
+
"avg_execution_time": self.metrics.avg_execution_time,
|
302
|
+
"max_execution_time": self.metrics.max_execution_time,
|
303
|
+
"success_rate": (
|
304
|
+
self.metrics.successful_operations
|
305
|
+
/ max(1, self.metrics.total_operations)
|
306
|
+
),
|
307
|
+
},
|
308
|
+
"config": {
|
309
|
+
"max_concurrent_operations": self.config.max_concurrent_operations,
|
310
|
+
"timeout": self.config.timeout,
|
311
|
+
"priority": self.config.priority,
|
312
|
+
"queue_size": self.config.queue_size,
|
313
|
+
},
|
314
|
+
"resources": {
|
315
|
+
"semaphore_available": self._semaphore._value,
|
316
|
+
"queue_size": self._operation_queue.qsize(),
|
317
|
+
"thread_pool_active": (
|
318
|
+
self._thread_pool._threads if self._thread_pool else 0
|
319
|
+
),
|
320
|
+
},
|
321
|
+
"circuit_breaker": (
|
322
|
+
self._circuit_breaker.get_status() if self._circuit_breaker else None
|
323
|
+
),
|
324
|
+
}
|
325
|
+
|
326
|
+
async def shutdown(self):
|
327
|
+
"""Shutdown partition and clean up resources."""
|
328
|
+
logger.info(f"Shutting down bulkhead partition: {self.config.name}")
|
329
|
+
|
330
|
+
if self._thread_pool:
|
331
|
+
self._thread_pool.shutdown(wait=True)
|
332
|
+
|
333
|
+
# Wait for active operations to complete (with timeout)
|
334
|
+
timeout = 30 # seconds
|
335
|
+
start_time = time.time()
|
336
|
+
|
337
|
+
while self._active_operations and (time.time() - start_time) < timeout:
|
338
|
+
await asyncio.sleep(0.1)
|
339
|
+
|
340
|
+
if self._active_operations:
|
341
|
+
logger.warning(
|
342
|
+
f"Partition {self.config.name} shutdown with {len(self._active_operations)} "
|
343
|
+
"active operations still running"
|
344
|
+
)
|
345
|
+
|
346
|
+
|
347
|
+
class BulkheadRejectionError(Exception):
|
348
|
+
"""Raised when operation is rejected due to bulkhead limits."""
|
349
|
+
|
350
|
+
pass
|
351
|
+
|
352
|
+
|
353
|
+
class BulkheadManager:
|
354
|
+
"""Manages multiple bulkhead partitions for operation isolation."""
|
355
|
+
|
356
|
+
def __init__(self):
|
357
|
+
"""Initialize bulkhead manager."""
|
358
|
+
self.partitions: Dict[str, BulkheadPartition] = {}
|
359
|
+
self._lock = threading.Lock()
|
360
|
+
|
361
|
+
# Create default partitions
|
362
|
+
self._create_default_partitions()
|
363
|
+
|
364
|
+
logger.info("Initialized BulkheadManager with default partitions")
|
365
|
+
|
366
|
+
def _create_default_partitions(self):
|
367
|
+
"""Create default partitions for common operations."""
|
368
|
+
default_configs = [
|
369
|
+
PartitionConfig(
|
370
|
+
name="critical",
|
371
|
+
partition_type=PartitionType.CRITICAL,
|
372
|
+
max_concurrent_operations=5,
|
373
|
+
timeout=10,
|
374
|
+
priority=10,
|
375
|
+
queue_size=20,
|
376
|
+
),
|
377
|
+
PartitionConfig(
|
378
|
+
name="database",
|
379
|
+
partition_type=PartitionType.IO_BOUND,
|
380
|
+
max_concurrent_operations=20,
|
381
|
+
max_connections=50,
|
382
|
+
timeout=30,
|
383
|
+
priority=5,
|
384
|
+
queue_size=100,
|
385
|
+
),
|
386
|
+
PartitionConfig(
|
387
|
+
name="compute",
|
388
|
+
partition_type=PartitionType.CPU_BOUND,
|
389
|
+
max_concurrent_operations=5,
|
390
|
+
max_threads=4,
|
391
|
+
timeout=60,
|
392
|
+
priority=3,
|
393
|
+
queue_size=50,
|
394
|
+
),
|
395
|
+
PartitionConfig(
|
396
|
+
name="background",
|
397
|
+
partition_type=PartitionType.BACKGROUND,
|
398
|
+
max_concurrent_operations=10,
|
399
|
+
timeout=120,
|
400
|
+
priority=1,
|
401
|
+
queue_size=200,
|
402
|
+
),
|
403
|
+
]
|
404
|
+
|
405
|
+
for config in default_configs:
|
406
|
+
self.partitions[config.name] = BulkheadPartition(config)
|
407
|
+
|
408
|
+
def create_partition(self, config: PartitionConfig) -> BulkheadPartition:
|
409
|
+
"""Create a new bulkhead partition."""
|
410
|
+
with self._lock:
|
411
|
+
if config.name in self.partitions:
|
412
|
+
raise ValueError(f"Partition {config.name} already exists")
|
413
|
+
|
414
|
+
partition = BulkheadPartition(config)
|
415
|
+
self.partitions[config.name] = partition
|
416
|
+
|
417
|
+
logger.info(f"Created bulkhead partition: {config.name}")
|
418
|
+
return partition
|
419
|
+
|
420
|
+
def get_partition(self, name: str) -> BulkheadPartition:
|
421
|
+
"""Get partition by name."""
|
422
|
+
if name not in self.partitions:
|
423
|
+
raise ValueError(f"Partition {name} not found")
|
424
|
+
return self.partitions[name]
|
425
|
+
|
426
|
+
@asynccontextmanager
|
427
|
+
async def isolated_execution(self, partition_name: str):
|
428
|
+
"""Context manager for isolated execution."""
|
429
|
+
partition = self.get_partition(partition_name)
|
430
|
+
try:
|
431
|
+
yield partition
|
432
|
+
finally:
|
433
|
+
# Any cleanup can be done here
|
434
|
+
pass
|
435
|
+
|
436
|
+
def get_all_status(self) -> Dict[str, Dict[str, Any]]:
|
437
|
+
"""Get status of all partitions."""
|
438
|
+
return {
|
439
|
+
name: partition.get_status() for name, partition in self.partitions.items()
|
440
|
+
}
|
441
|
+
|
442
|
+
async def shutdown_all(self):
|
443
|
+
"""Shutdown all partitions."""
|
444
|
+
logger.info("Shutting down all bulkhead partitions")
|
445
|
+
|
446
|
+
# Shutdown all partitions concurrently
|
447
|
+
shutdown_tasks = [
|
448
|
+
partition.shutdown() for partition in self.partitions.values()
|
449
|
+
]
|
450
|
+
|
451
|
+
await asyncio.gather(*shutdown_tasks, return_exceptions=True)
|
452
|
+
|
453
|
+
self.partitions.clear()
|
454
|
+
logger.info("All bulkhead partitions shut down")
|
455
|
+
|
456
|
+
|
457
|
+
# Global bulkhead manager instance
|
458
|
+
_bulkhead_manager: Optional[BulkheadManager] = None
|
459
|
+
|
460
|
+
|
461
|
+
def get_bulkhead_manager() -> BulkheadManager:
|
462
|
+
"""Get global bulkhead manager instance."""
|
463
|
+
global _bulkhead_manager
|
464
|
+
if _bulkhead_manager is None:
|
465
|
+
_bulkhead_manager = BulkheadManager()
|
466
|
+
return _bulkhead_manager
|
467
|
+
|
468
|
+
|
469
|
+
async def execute_with_bulkhead(
|
470
|
+
partition_name: str, func: Callable, *args, **kwargs
|
471
|
+
) -> Any:
|
472
|
+
"""Convenience function to execute operation with bulkhead isolation."""
|
473
|
+
manager = get_bulkhead_manager()
|
474
|
+
partition = manager.get_partition(partition_name)
|
475
|
+
return await partition.execute(func, *args, **kwargs)
|