kailash 0.6.3__py3-none-any.whl → 0.6.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (120) hide show
  1. kailash/__init__.py +3 -3
  2. kailash/api/custom_nodes_secure.py +3 -3
  3. kailash/api/gateway.py +1 -1
  4. kailash/api/studio.py +2 -3
  5. kailash/api/workflow_api.py +3 -4
  6. kailash/core/resilience/bulkhead.py +460 -0
  7. kailash/core/resilience/circuit_breaker.py +92 -10
  8. kailash/edge/discovery.py +86 -0
  9. kailash/mcp_server/__init__.py +309 -33
  10. kailash/mcp_server/advanced_features.py +1022 -0
  11. kailash/mcp_server/ai_registry_server.py +27 -2
  12. kailash/mcp_server/auth.py +789 -0
  13. kailash/mcp_server/client.py +645 -378
  14. kailash/mcp_server/discovery.py +1593 -0
  15. kailash/mcp_server/errors.py +673 -0
  16. kailash/mcp_server/oauth.py +1727 -0
  17. kailash/mcp_server/protocol.py +1126 -0
  18. kailash/mcp_server/registry_integration.py +587 -0
  19. kailash/mcp_server/server.py +1213 -98
  20. kailash/mcp_server/transports.py +1169 -0
  21. kailash/mcp_server/utils/__init__.py +6 -1
  22. kailash/mcp_server/utils/cache.py +250 -7
  23. kailash/middleware/auth/auth_manager.py +3 -3
  24. kailash/middleware/communication/api_gateway.py +2 -9
  25. kailash/middleware/communication/realtime.py +1 -1
  26. kailash/middleware/mcp/enhanced_server.py +1 -1
  27. kailash/nodes/__init__.py +2 -0
  28. kailash/nodes/admin/audit_log.py +6 -6
  29. kailash/nodes/admin/permission_check.py +8 -8
  30. kailash/nodes/admin/role_management.py +32 -28
  31. kailash/nodes/admin/schema.sql +6 -1
  32. kailash/nodes/admin/schema_manager.py +13 -13
  33. kailash/nodes/admin/security_event.py +16 -20
  34. kailash/nodes/admin/tenant_isolation.py +3 -3
  35. kailash/nodes/admin/transaction_utils.py +3 -3
  36. kailash/nodes/admin/user_management.py +21 -22
  37. kailash/nodes/ai/a2a.py +11 -11
  38. kailash/nodes/ai/ai_providers.py +9 -12
  39. kailash/nodes/ai/embedding_generator.py +13 -14
  40. kailash/nodes/ai/intelligent_agent_orchestrator.py +19 -19
  41. kailash/nodes/ai/iterative_llm_agent.py +2 -2
  42. kailash/nodes/ai/llm_agent.py +210 -33
  43. kailash/nodes/ai/self_organizing.py +2 -2
  44. kailash/nodes/alerts/discord.py +4 -4
  45. kailash/nodes/api/graphql.py +6 -6
  46. kailash/nodes/api/http.py +12 -17
  47. kailash/nodes/api/rate_limiting.py +4 -4
  48. kailash/nodes/api/rest.py +15 -15
  49. kailash/nodes/auth/mfa.py +3 -4
  50. kailash/nodes/auth/risk_assessment.py +2 -2
  51. kailash/nodes/auth/session_management.py +5 -5
  52. kailash/nodes/auth/sso.py +143 -0
  53. kailash/nodes/base.py +6 -2
  54. kailash/nodes/base_async.py +16 -2
  55. kailash/nodes/base_with_acl.py +2 -2
  56. kailash/nodes/cache/__init__.py +9 -0
  57. kailash/nodes/cache/cache.py +1172 -0
  58. kailash/nodes/cache/cache_invalidation.py +870 -0
  59. kailash/nodes/cache/redis_pool_manager.py +595 -0
  60. kailash/nodes/code/async_python.py +2 -1
  61. kailash/nodes/code/python.py +196 -35
  62. kailash/nodes/compliance/data_retention.py +6 -6
  63. kailash/nodes/compliance/gdpr.py +5 -5
  64. kailash/nodes/data/__init__.py +10 -0
  65. kailash/nodes/data/optimistic_locking.py +906 -0
  66. kailash/nodes/data/readers.py +8 -8
  67. kailash/nodes/data/redis.py +349 -0
  68. kailash/nodes/data/sql.py +314 -3
  69. kailash/nodes/data/streaming.py +21 -0
  70. kailash/nodes/enterprise/__init__.py +8 -0
  71. kailash/nodes/enterprise/audit_logger.py +285 -0
  72. kailash/nodes/enterprise/batch_processor.py +22 -3
  73. kailash/nodes/enterprise/data_lineage.py +1 -1
  74. kailash/nodes/enterprise/mcp_executor.py +205 -0
  75. kailash/nodes/enterprise/service_discovery.py +150 -0
  76. kailash/nodes/enterprise/tenant_assignment.py +108 -0
  77. kailash/nodes/logic/async_operations.py +2 -2
  78. kailash/nodes/logic/convergence.py +1 -1
  79. kailash/nodes/logic/operations.py +1 -1
  80. kailash/nodes/monitoring/__init__.py +11 -1
  81. kailash/nodes/monitoring/health_check.py +456 -0
  82. kailash/nodes/monitoring/log_processor.py +817 -0
  83. kailash/nodes/monitoring/metrics_collector.py +627 -0
  84. kailash/nodes/monitoring/performance_benchmark.py +137 -11
  85. kailash/nodes/rag/advanced.py +7 -7
  86. kailash/nodes/rag/agentic.py +49 -2
  87. kailash/nodes/rag/conversational.py +3 -3
  88. kailash/nodes/rag/evaluation.py +3 -3
  89. kailash/nodes/rag/federated.py +3 -3
  90. kailash/nodes/rag/graph.py +3 -3
  91. kailash/nodes/rag/multimodal.py +3 -3
  92. kailash/nodes/rag/optimized.py +5 -5
  93. kailash/nodes/rag/privacy.py +3 -3
  94. kailash/nodes/rag/query_processing.py +6 -6
  95. kailash/nodes/rag/realtime.py +1 -1
  96. kailash/nodes/rag/registry.py +2 -6
  97. kailash/nodes/rag/router.py +1 -1
  98. kailash/nodes/rag/similarity.py +7 -7
  99. kailash/nodes/rag/strategies.py +4 -4
  100. kailash/nodes/security/abac_evaluator.py +6 -6
  101. kailash/nodes/security/behavior_analysis.py +5 -6
  102. kailash/nodes/security/credential_manager.py +1 -1
  103. kailash/nodes/security/rotating_credentials.py +11 -11
  104. kailash/nodes/security/threat_detection.py +8 -8
  105. kailash/nodes/testing/credential_testing.py +2 -2
  106. kailash/nodes/transform/processors.py +5 -5
  107. kailash/runtime/local.py +162 -14
  108. kailash/runtime/parameter_injection.py +425 -0
  109. kailash/runtime/parameter_injector.py +657 -0
  110. kailash/runtime/testing.py +2 -2
  111. kailash/testing/fixtures.py +2 -2
  112. kailash/workflow/builder.py +99 -18
  113. kailash/workflow/builder_improvements.py +207 -0
  114. kailash/workflow/input_handling.py +170 -0
  115. {kailash-0.6.3.dist-info → kailash-0.6.4.dist-info}/METADATA +22 -9
  116. {kailash-0.6.3.dist-info → kailash-0.6.4.dist-info}/RECORD +120 -94
  117. {kailash-0.6.3.dist-info → kailash-0.6.4.dist-info}/WHEEL +0 -0
  118. {kailash-0.6.3.dist-info → kailash-0.6.4.dist-info}/entry_points.txt +0 -0
  119. {kailash-0.6.3.dist-info → kailash-0.6.4.dist-info}/licenses/LICENSE +0 -0
  120. {kailash-0.6.3.dist-info → kailash-0.6.4.dist-info}/top_level.txt +0 -0
kailash/__init__.py CHANGED
@@ -3,8 +3,8 @@
3
3
  The Kailash SDK provides a comprehensive framework for creating nodes and workflows
4
4
  that align with container-node architecture while allowing rapid prototyping.
5
5
 
6
- New in v0.4.2: Bug fixes including circular import resolution and JWT implementation
7
- consolidation. Improved changelog organization with individual release files.
6
+ New in v0.6.4: Enterprise-grade parameter injection system, comprehensive E2E test
7
+ improvements achieving 100% pass rate, and enhanced documentation based on test findings.
8
8
  """
9
9
 
10
10
  from kailash.nodes.base import Node, NodeMetadata, NodeParameter
@@ -33,7 +33,7 @@ except ImportError:
33
33
  # For backward compatibility
34
34
  WorkflowGraph = Workflow
35
35
 
36
- __version__ = "0.6.3"
36
+ __version__ = "0.6.4"
37
37
 
38
38
  __all__ = [
39
39
  # Core workflow components
@@ -317,7 +317,7 @@ async def _execute_python_node(
317
317
  )
318
318
 
319
319
  # Run the node
320
- result = python_node.run(**test_data)
320
+ result = python_node.execute(**test_data)
321
321
 
322
322
  return result
323
323
 
@@ -369,9 +369,9 @@ async def _execute_api_node(
369
369
  # Prepare request data
370
370
  if api_config.get("method") in ["POST", "PUT", "PATCH"]:
371
371
  # Include test data in body
372
- result = await http_node.run(json_data=test_data)
372
+ result = await http_node.execute(json_data=test_data)
373
373
  else:
374
374
  # Include test data as query params
375
- result = await http_node.run(params=test_data)
375
+ result = await http_node.execute(params=test_data)
376
376
 
377
377
  return result
kailash/api/gateway.py CHANGED
@@ -29,7 +29,7 @@ Example:
29
29
  >>> gateway.register_workflow("analytics", analytics_workflow)
30
30
 
31
31
  >>> # Start server
32
- >>> gateway.run(port=8000) # doctest: +SKIP
32
+ >>> gateway.execute(port=8000) # doctest: +SKIP
33
33
 
34
34
  >>> # With MCP integration
35
35
  >>> from kailash.api.mcp_integration import MCPIntegration
kailash/api/studio.py CHANGED
@@ -20,14 +20,13 @@ from typing import Any
20
20
  import uvicorn
21
21
  from fastapi import FastAPI, HTTPException, Query, WebSocket, WebSocketDisconnect
22
22
  from fastapi.middleware.cors import CORSMiddleware
23
- from pydantic import BaseModel, Field
24
-
25
23
  from kailash.nodes.base import NodeRegistry
26
24
  from kailash.runtime.local import LocalRuntime
27
25
  from kailash.tracking.manager import TaskManager
28
26
  from kailash.tracking.storage.filesystem import FileSystemStorage
29
27
  from kailash.utils.export import export_workflow
30
28
  from kailash.workflow import Workflow
29
+ from pydantic import BaseModel, Field
31
30
 
32
31
  from .custom_nodes import setup_custom_node_routes
33
32
  from .database import (
@@ -908,7 +907,7 @@ def main():
908
907
 
909
908
  # Create and run API
910
909
  api = WorkflowStudioAPI(tenant_id=args.tenant_id)
911
- api.run(host=args.host, port=args.port)
910
+ api.execute(host=args.host, port=args.port)
912
911
 
913
912
 
914
913
  if __name__ == "__main__":
@@ -13,11 +13,10 @@ from typing import Any
13
13
  import uvicorn
14
14
  from fastapi import BackgroundTasks, FastAPI, HTTPException
15
15
  from fastapi.responses import StreamingResponse
16
- from pydantic import BaseModel, Field
17
-
18
16
  from kailash.runtime.local import LocalRuntime
19
17
  from kailash.workflow.builder import WorkflowBuilder
20
18
  from kailash.workflow.graph import Workflow
19
+ from pydantic import BaseModel, Field
21
20
 
22
21
 
23
22
  class ExecutionMode(str, Enum):
@@ -58,7 +57,7 @@ class WorkflowAPI:
58
57
  >>> # For any workflow
59
58
  >>> from my_workflows import rag_workflow
60
59
  >>> api = WorkflowAPI(rag_workflow)
61
- >>> api.run(port=8000)
60
+ >>> api.execute(port=8000)
62
61
  """
63
62
 
64
63
  def __init__(
@@ -388,7 +387,7 @@ def create_workflow_api(
388
387
 
389
388
  Example:
390
389
  >>> api = create_workflow_api(my_workflow, api_type="rag")
391
- >>> api.run(port=8000)
390
+ >>> api.execute(port=8000)
392
391
  """
393
392
  api_classes = {
394
393
  "generic": WorkflowAPI,
@@ -0,0 +1,460 @@
1
+ """Bulkhead isolation pattern implementation for operation segregation.
2
+
3
+ This module implements the Bulkhead pattern to isolate different types of
4
+ operations with separate resource pools, preventing resource exhaustion
5
+ in one area from affecting other operations.
6
+
7
+ The bulkhead provides:
8
+ - Resource pool isolation by operation type
9
+ - Thread pool management for CPU-bound tasks
10
+ - Connection pool management for I/O operations
11
+ - Priority-based resource allocation
12
+ - Real-time monitoring and metrics
13
+
14
+ Example:
15
+ >>> bulkhead = BulkheadManager()
16
+ >>>
17
+ >>> # Execute with isolation
18
+ >>> async with bulkhead.get_partition("critical_operations") as partition:
19
+ ... result = await partition.execute(critical_task)
20
+ """
21
+
22
+ import asyncio
23
+ import logging
24
+ import threading
25
+ import time
26
+ from concurrent.futures import ThreadPoolExecutor
27
+ from contextlib import asynccontextmanager
28
+ from dataclasses import dataclass, field
29
+ from datetime import UTC, datetime
30
+ from enum import Enum
31
+ from typing import Any, Callable, Dict, List, Optional, Set, Union
32
+ from uuid import uuid4
33
+
34
+ logger = logging.getLogger(__name__)
35
+
36
+
37
+ class PartitionType(Enum):
38
+ """Types of bulkhead partitions."""
39
+
40
+ CPU_BOUND = "cpu_bound" # For CPU-intensive operations
41
+ IO_BOUND = "io_bound" # For I/O operations
42
+ CRITICAL = "critical" # For critical high-priority operations
43
+ BACKGROUND = "background" # For background/batch operations
44
+ CUSTOM = "custom" # Custom partition types
45
+
46
+
47
+ class ResourceType(Enum):
48
+ """Types of resources managed by bulkhead."""
49
+
50
+ THREADS = "threads"
51
+ CONNECTIONS = "connections"
52
+ MEMORY = "memory"
53
+ SEMAPHORE = "semaphore"
54
+
55
+
56
+ @dataclass
57
+ class PartitionConfig:
58
+ """Configuration for a bulkhead partition."""
59
+
60
+ name: str
61
+ partition_type: PartitionType
62
+ max_concurrent_operations: int = 10
63
+ max_threads: Optional[int] = None # For CPU-bound partitions
64
+ max_connections: Optional[int] = None # For I/O partitions
65
+ timeout: int = 30 # Operation timeout in seconds
66
+ priority: int = 1 # Higher number = higher priority
67
+ queue_size: int = 100 # Max queued operations
68
+ isolation_level: str = "strict" # strict, relaxed, shared
69
+ circuit_breaker_enabled: bool = True
70
+ metrics_enabled: bool = True
71
+ resource_limits: Dict[ResourceType, int] = field(default_factory=dict)
72
+
73
+
74
+ @dataclass
75
+ class PartitionMetrics:
76
+ """Metrics for a bulkhead partition."""
77
+
78
+ total_operations: int = 0
79
+ successful_operations: int = 0
80
+ failed_operations: int = 0
81
+ rejected_operations: int = 0
82
+ queued_operations: int = 0
83
+ active_operations: int = 0
84
+ avg_execution_time: float = 0.0
85
+ max_execution_time: float = 0.0
86
+ resource_utilization: Dict[ResourceType, float] = field(default_factory=dict)
87
+ last_activity: Optional[datetime] = None
88
+ created_at: datetime = field(default_factory=lambda: datetime.now(UTC))
89
+
90
+
91
+ class BulkheadPartition:
92
+ """Individual partition within the bulkhead for operation isolation."""
93
+
94
+ def __init__(self, config: PartitionConfig):
95
+ """Initialize bulkhead partition."""
96
+ self.config = config
97
+ self.metrics = PartitionMetrics()
98
+ self._lock = asyncio.Lock()
99
+
100
+ # Resource management
101
+ self._semaphore = asyncio.Semaphore(config.max_concurrent_operations)
102
+ self._thread_pool: Optional[ThreadPoolExecutor] = None
103
+ self._active_operations: Set[str] = set()
104
+ self._operation_queue: asyncio.Queue = asyncio.Queue(maxsize=config.queue_size)
105
+
106
+ # Initialize thread pool for CPU-bound operations
107
+ if config.partition_type == PartitionType.CPU_BOUND and config.max_threads:
108
+ self._thread_pool = ThreadPoolExecutor(
109
+ max_workers=config.max_threads,
110
+ thread_name_prefix=f"bulkhead-{config.name}",
111
+ )
112
+
113
+ # Circuit breaker integration
114
+ self._circuit_breaker = None
115
+ if config.circuit_breaker_enabled:
116
+ from kailash.core.resilience.circuit_breaker import (
117
+ CircuitBreakerConfig,
118
+ ConnectionCircuitBreaker,
119
+ )
120
+
121
+ breaker_config = CircuitBreakerConfig(
122
+ failure_threshold=5, recovery_timeout=30
123
+ )
124
+ self._circuit_breaker = ConnectionCircuitBreaker(breaker_config)
125
+
126
+ logger.info(f"Initialized bulkhead partition: {config.name}")
127
+
128
+ async def execute(
129
+ self,
130
+ func: Callable,
131
+ *args,
132
+ priority: Optional[int] = None,
133
+ timeout: Optional[int] = None,
134
+ **kwargs,
135
+ ) -> Any:
136
+ """Execute operation within partition isolation.
137
+
138
+ Args:
139
+ func: Function to execute
140
+ *args: Function arguments
141
+ priority: Operation priority (overrides partition default)
142
+ timeout: Operation timeout (overrides partition default)
143
+ **kwargs: Function keyword arguments
144
+
145
+ Returns:
146
+ Function result
147
+
148
+ Raises:
149
+ BulkheadRejectionError: If partition is overloaded
150
+ asyncio.TimeoutError: If operation times out
151
+ """
152
+ operation_id = str(uuid4())
153
+ start_time = time.time()
154
+
155
+ # Use provided timeout or partition default
156
+ op_timeout = timeout or self.config.timeout
157
+
158
+ try:
159
+ # Check queue capacity
160
+ if self._operation_queue.full():
161
+ await self._record_rejection("queue_full")
162
+ raise BulkheadRejectionError(
163
+ f"Partition {self.config.name} queue is full"
164
+ )
165
+
166
+ # Queue the operation
167
+ await self._operation_queue.put((operation_id, func, args, kwargs))
168
+
169
+ async with self._lock:
170
+ self.metrics.queued_operations += 1
171
+
172
+ # Execute with circuit breaker if enabled
173
+ if self._circuit_breaker:
174
+ result = await self._circuit_breaker.call(
175
+ self._execute_isolated, operation_id, func, args, kwargs, op_timeout
176
+ )
177
+ else:
178
+ result = await self._execute_isolated(
179
+ operation_id, func, args, kwargs, op_timeout
180
+ )
181
+
182
+ execution_time = time.time() - start_time
183
+ await self._record_success(execution_time)
184
+
185
+ return result
186
+
187
+ except Exception as e:
188
+ execution_time = time.time() - start_time
189
+ await self._record_failure(execution_time)
190
+ raise
191
+ finally:
192
+ # Clean up
193
+ async with self._lock:
194
+ if operation_id in self._active_operations:
195
+ self._active_operations.remove(operation_id)
196
+ self.metrics.active_operations = len(self._active_operations)
197
+
198
+ async def _execute_isolated(
199
+ self, operation_id: str, func: Callable, args: tuple, kwargs: dict, timeout: int
200
+ ) -> Any:
201
+ """Execute operation with resource isolation."""
202
+ # Acquire semaphore (limits concurrent operations)
203
+ async with self._semaphore:
204
+ async with self._lock:
205
+ self._active_operations.add(operation_id)
206
+ self.metrics.active_operations = len(self._active_operations)
207
+ self.metrics.total_operations += 1
208
+ self.metrics.last_activity = datetime.now(UTC)
209
+
210
+ try:
211
+ # Remove from queue
212
+ await self._operation_queue.get()
213
+
214
+ # Execute based on partition type
215
+ if (
216
+ self.config.partition_type == PartitionType.CPU_BOUND
217
+ and self._thread_pool
218
+ ):
219
+ # Run CPU-bound task in thread pool
220
+ loop = asyncio.get_event_loop()
221
+ result = await asyncio.wait_for(
222
+ loop.run_in_executor(self._thread_pool, func, *args),
223
+ timeout=timeout,
224
+ )
225
+ else:
226
+ # Run I/O-bound or async task directly
227
+ if asyncio.iscoroutinefunction(func):
228
+ result = await asyncio.wait_for(
229
+ func(*args, **kwargs), timeout=timeout
230
+ )
231
+ else:
232
+ # Synchronous function
233
+ result = await asyncio.wait_for(
234
+ asyncio.to_thread(func, *args, **kwargs), timeout=timeout
235
+ )
236
+
237
+ return result
238
+
239
+ finally:
240
+ async with self._lock:
241
+ self.metrics.queued_operations = max(
242
+ 0, self.metrics.queued_operations - 1
243
+ )
244
+
245
+ async def _record_success(self, execution_time: float):
246
+ """Record successful operation."""
247
+ async with self._lock:
248
+ self.metrics.successful_operations += 1
249
+
250
+ # Update execution time metrics
251
+ total_ops = self.metrics.successful_operations
252
+ current_avg = self.metrics.avg_execution_time
253
+ self.metrics.avg_execution_time = (
254
+ current_avg * (total_ops - 1) + execution_time
255
+ ) / total_ops
256
+
257
+ if execution_time > self.metrics.max_execution_time:
258
+ self.metrics.max_execution_time = execution_time
259
+
260
+ async def _record_failure(self, execution_time: float):
261
+ """Record failed operation."""
262
+ async with self._lock:
263
+ self.metrics.failed_operations += 1
264
+
265
+ async def _record_rejection(self, reason: str):
266
+ """Record rejected operation."""
267
+ async with self._lock:
268
+ self.metrics.rejected_operations += 1
269
+
270
+ logger.warning(
271
+ f"Operation rejected from partition {self.config.name}: {reason}"
272
+ )
273
+
274
+ def get_status(self) -> Dict[str, Any]:
275
+ """Get current partition status."""
276
+ return {
277
+ "name": self.config.name,
278
+ "type": self.config.partition_type.value,
279
+ "metrics": {
280
+ "total_operations": self.metrics.total_operations,
281
+ "successful_operations": self.metrics.successful_operations,
282
+ "failed_operations": self.metrics.failed_operations,
283
+ "rejected_operations": self.metrics.rejected_operations,
284
+ "active_operations": self.metrics.active_operations,
285
+ "queued_operations": self.metrics.queued_operations,
286
+ "avg_execution_time": self.metrics.avg_execution_time,
287
+ "max_execution_time": self.metrics.max_execution_time,
288
+ "success_rate": (
289
+ self.metrics.successful_operations
290
+ / max(1, self.metrics.total_operations)
291
+ ),
292
+ },
293
+ "config": {
294
+ "max_concurrent_operations": self.config.max_concurrent_operations,
295
+ "timeout": self.config.timeout,
296
+ "priority": self.config.priority,
297
+ "queue_size": self.config.queue_size,
298
+ },
299
+ "resources": {
300
+ "semaphore_available": self._semaphore._value,
301
+ "queue_size": self._operation_queue.qsize(),
302
+ "thread_pool_active": (
303
+ self._thread_pool._threads if self._thread_pool else 0
304
+ ),
305
+ },
306
+ "circuit_breaker": (
307
+ self._circuit_breaker.get_status() if self._circuit_breaker else None
308
+ ),
309
+ }
310
+
311
+ async def shutdown(self):
312
+ """Shutdown partition and clean up resources."""
313
+ logger.info(f"Shutting down bulkhead partition: {self.config.name}")
314
+
315
+ if self._thread_pool:
316
+ self._thread_pool.shutdown(wait=True)
317
+
318
+ # Wait for active operations to complete (with timeout)
319
+ timeout = 30 # seconds
320
+ start_time = time.time()
321
+
322
+ while self._active_operations and (time.time() - start_time) < timeout:
323
+ await asyncio.sleep(0.1)
324
+
325
+ if self._active_operations:
326
+ logger.warning(
327
+ f"Partition {self.config.name} shutdown with {len(self._active_operations)} "
328
+ "active operations still running"
329
+ )
330
+
331
+
332
+ class BulkheadRejectionError(Exception):
333
+ """Raised when operation is rejected due to bulkhead limits."""
334
+
335
+ pass
336
+
337
+
338
+ class BulkheadManager:
339
+ """Manages multiple bulkhead partitions for operation isolation."""
340
+
341
+ def __init__(self):
342
+ """Initialize bulkhead manager."""
343
+ self.partitions: Dict[str, BulkheadPartition] = {}
344
+ self._lock = threading.Lock()
345
+
346
+ # Create default partitions
347
+ self._create_default_partitions()
348
+
349
+ logger.info("Initialized BulkheadManager with default partitions")
350
+
351
+ def _create_default_partitions(self):
352
+ """Create default partitions for common operations."""
353
+ default_configs = [
354
+ PartitionConfig(
355
+ name="critical",
356
+ partition_type=PartitionType.CRITICAL,
357
+ max_concurrent_operations=5,
358
+ timeout=10,
359
+ priority=10,
360
+ queue_size=20,
361
+ ),
362
+ PartitionConfig(
363
+ name="database",
364
+ partition_type=PartitionType.IO_BOUND,
365
+ max_concurrent_operations=20,
366
+ max_connections=50,
367
+ timeout=30,
368
+ priority=5,
369
+ queue_size=100,
370
+ ),
371
+ PartitionConfig(
372
+ name="compute",
373
+ partition_type=PartitionType.CPU_BOUND,
374
+ max_concurrent_operations=5,
375
+ max_threads=4,
376
+ timeout=60,
377
+ priority=3,
378
+ queue_size=50,
379
+ ),
380
+ PartitionConfig(
381
+ name="background",
382
+ partition_type=PartitionType.BACKGROUND,
383
+ max_concurrent_operations=10,
384
+ timeout=120,
385
+ priority=1,
386
+ queue_size=200,
387
+ ),
388
+ ]
389
+
390
+ for config in default_configs:
391
+ self.partitions[config.name] = BulkheadPartition(config)
392
+
393
+ def create_partition(self, config: PartitionConfig) -> BulkheadPartition:
394
+ """Create a new bulkhead partition."""
395
+ with self._lock:
396
+ if config.name in self.partitions:
397
+ raise ValueError(f"Partition {config.name} already exists")
398
+
399
+ partition = BulkheadPartition(config)
400
+ self.partitions[config.name] = partition
401
+
402
+ logger.info(f"Created bulkhead partition: {config.name}")
403
+ return partition
404
+
405
+ def get_partition(self, name: str) -> BulkheadPartition:
406
+ """Get partition by name."""
407
+ if name not in self.partitions:
408
+ raise ValueError(f"Partition {name} not found")
409
+ return self.partitions[name]
410
+
411
+ @asynccontextmanager
412
+ async def isolated_execution(self, partition_name: str):
413
+ """Context manager for isolated execution."""
414
+ partition = self.get_partition(partition_name)
415
+ try:
416
+ yield partition
417
+ finally:
418
+ # Any cleanup can be done here
419
+ pass
420
+
421
+ def get_all_status(self) -> Dict[str, Dict[str, Any]]:
422
+ """Get status of all partitions."""
423
+ return {
424
+ name: partition.get_status() for name, partition in self.partitions.items()
425
+ }
426
+
427
+ async def shutdown_all(self):
428
+ """Shutdown all partitions."""
429
+ logger.info("Shutting down all bulkhead partitions")
430
+
431
+ # Shutdown all partitions concurrently
432
+ shutdown_tasks = [
433
+ partition.shutdown() for partition in self.partitions.values()
434
+ ]
435
+
436
+ await asyncio.gather(*shutdown_tasks, return_exceptions=True)
437
+
438
+ self.partitions.clear()
439
+ logger.info("All bulkhead partitions shut down")
440
+
441
+
442
+ # Global bulkhead manager instance
443
+ _bulkhead_manager: Optional[BulkheadManager] = None
444
+
445
+
446
+ def get_bulkhead_manager() -> BulkheadManager:
447
+ """Get global bulkhead manager instance."""
448
+ global _bulkhead_manager
449
+ if _bulkhead_manager is None:
450
+ _bulkhead_manager = BulkheadManager()
451
+ return _bulkhead_manager
452
+
453
+
454
+ async def execute_with_bulkhead(
455
+ partition_name: str, func: Callable, *args, **kwargs
456
+ ) -> Any:
457
+ """Convenience function to execute operation with bulkhead isolation."""
458
+ manager = get_bulkhead_manager()
459
+ partition = manager.get_partition(partition_name)
460
+ return await partition.execute(func, *args, **kwargs)