kailash 0.6.2__py3-none-any.whl → 0.6.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (131) hide show
  1. kailash/__init__.py +3 -3
  2. kailash/api/custom_nodes_secure.py +3 -3
  3. kailash/api/gateway.py +1 -1
  4. kailash/api/studio.py +2 -3
  5. kailash/api/workflow_api.py +3 -4
  6. kailash/core/resilience/bulkhead.py +460 -0
  7. kailash/core/resilience/circuit_breaker.py +92 -10
  8. kailash/edge/discovery.py +86 -0
  9. kailash/mcp_server/__init__.py +334 -0
  10. kailash/mcp_server/advanced_features.py +1022 -0
  11. kailash/{mcp → mcp_server}/ai_registry_server.py +29 -4
  12. kailash/mcp_server/auth.py +789 -0
  13. kailash/mcp_server/client.py +712 -0
  14. kailash/mcp_server/discovery.py +1593 -0
  15. kailash/mcp_server/errors.py +673 -0
  16. kailash/mcp_server/oauth.py +1727 -0
  17. kailash/mcp_server/protocol.py +1126 -0
  18. kailash/mcp_server/registry_integration.py +587 -0
  19. kailash/mcp_server/server.py +1747 -0
  20. kailash/{mcp → mcp_server}/servers/ai_registry.py +2 -2
  21. kailash/mcp_server/transports.py +1169 -0
  22. kailash/mcp_server/utils/cache.py +510 -0
  23. kailash/middleware/auth/auth_manager.py +3 -3
  24. kailash/middleware/communication/api_gateway.py +2 -9
  25. kailash/middleware/communication/realtime.py +1 -1
  26. kailash/middleware/mcp/client_integration.py +1 -1
  27. kailash/middleware/mcp/enhanced_server.py +2 -2
  28. kailash/nodes/__init__.py +2 -0
  29. kailash/nodes/admin/audit_log.py +6 -6
  30. kailash/nodes/admin/permission_check.py +8 -8
  31. kailash/nodes/admin/role_management.py +32 -28
  32. kailash/nodes/admin/schema.sql +6 -1
  33. kailash/nodes/admin/schema_manager.py +13 -13
  34. kailash/nodes/admin/security_event.py +16 -20
  35. kailash/nodes/admin/tenant_isolation.py +3 -3
  36. kailash/nodes/admin/transaction_utils.py +3 -3
  37. kailash/nodes/admin/user_management.py +21 -22
  38. kailash/nodes/ai/a2a.py +11 -11
  39. kailash/nodes/ai/ai_providers.py +9 -12
  40. kailash/nodes/ai/embedding_generator.py +13 -14
  41. kailash/nodes/ai/intelligent_agent_orchestrator.py +19 -19
  42. kailash/nodes/ai/iterative_llm_agent.py +3 -3
  43. kailash/nodes/ai/llm_agent.py +213 -36
  44. kailash/nodes/ai/self_organizing.py +2 -2
  45. kailash/nodes/alerts/discord.py +4 -4
  46. kailash/nodes/api/graphql.py +6 -6
  47. kailash/nodes/api/http.py +12 -17
  48. kailash/nodes/api/rate_limiting.py +4 -4
  49. kailash/nodes/api/rest.py +15 -15
  50. kailash/nodes/auth/mfa.py +3 -4
  51. kailash/nodes/auth/risk_assessment.py +2 -2
  52. kailash/nodes/auth/session_management.py +5 -5
  53. kailash/nodes/auth/sso.py +143 -0
  54. kailash/nodes/base.py +6 -2
  55. kailash/nodes/base_async.py +16 -2
  56. kailash/nodes/base_with_acl.py +2 -2
  57. kailash/nodes/cache/__init__.py +9 -0
  58. kailash/nodes/cache/cache.py +1172 -0
  59. kailash/nodes/cache/cache_invalidation.py +870 -0
  60. kailash/nodes/cache/redis_pool_manager.py +595 -0
  61. kailash/nodes/code/async_python.py +2 -1
  62. kailash/nodes/code/python.py +196 -35
  63. kailash/nodes/compliance/data_retention.py +6 -6
  64. kailash/nodes/compliance/gdpr.py +5 -5
  65. kailash/nodes/data/__init__.py +10 -0
  66. kailash/nodes/data/optimistic_locking.py +906 -0
  67. kailash/nodes/data/readers.py +8 -8
  68. kailash/nodes/data/redis.py +349 -0
  69. kailash/nodes/data/sql.py +314 -3
  70. kailash/nodes/data/streaming.py +21 -0
  71. kailash/nodes/enterprise/__init__.py +8 -0
  72. kailash/nodes/enterprise/audit_logger.py +285 -0
  73. kailash/nodes/enterprise/batch_processor.py +22 -3
  74. kailash/nodes/enterprise/data_lineage.py +1 -1
  75. kailash/nodes/enterprise/mcp_executor.py +205 -0
  76. kailash/nodes/enterprise/service_discovery.py +150 -0
  77. kailash/nodes/enterprise/tenant_assignment.py +108 -0
  78. kailash/nodes/logic/async_operations.py +2 -2
  79. kailash/nodes/logic/convergence.py +1 -1
  80. kailash/nodes/logic/operations.py +1 -1
  81. kailash/nodes/monitoring/__init__.py +11 -1
  82. kailash/nodes/monitoring/health_check.py +456 -0
  83. kailash/nodes/monitoring/log_processor.py +817 -0
  84. kailash/nodes/monitoring/metrics_collector.py +627 -0
  85. kailash/nodes/monitoring/performance_benchmark.py +137 -11
  86. kailash/nodes/rag/advanced.py +7 -7
  87. kailash/nodes/rag/agentic.py +49 -2
  88. kailash/nodes/rag/conversational.py +3 -3
  89. kailash/nodes/rag/evaluation.py +3 -3
  90. kailash/nodes/rag/federated.py +3 -3
  91. kailash/nodes/rag/graph.py +3 -3
  92. kailash/nodes/rag/multimodal.py +3 -3
  93. kailash/nodes/rag/optimized.py +5 -5
  94. kailash/nodes/rag/privacy.py +3 -3
  95. kailash/nodes/rag/query_processing.py +6 -6
  96. kailash/nodes/rag/realtime.py +1 -1
  97. kailash/nodes/rag/registry.py +2 -6
  98. kailash/nodes/rag/router.py +1 -1
  99. kailash/nodes/rag/similarity.py +7 -7
  100. kailash/nodes/rag/strategies.py +4 -4
  101. kailash/nodes/security/abac_evaluator.py +6 -6
  102. kailash/nodes/security/behavior_analysis.py +5 -6
  103. kailash/nodes/security/credential_manager.py +1 -1
  104. kailash/nodes/security/rotating_credentials.py +11 -11
  105. kailash/nodes/security/threat_detection.py +8 -8
  106. kailash/nodes/testing/credential_testing.py +2 -2
  107. kailash/nodes/transform/processors.py +5 -5
  108. kailash/runtime/local.py +162 -14
  109. kailash/runtime/parameter_injection.py +425 -0
  110. kailash/runtime/parameter_injector.py +657 -0
  111. kailash/runtime/testing.py +2 -2
  112. kailash/testing/fixtures.py +2 -2
  113. kailash/workflow/builder.py +99 -18
  114. kailash/workflow/builder_improvements.py +207 -0
  115. kailash/workflow/input_handling.py +170 -0
  116. {kailash-0.6.2.dist-info → kailash-0.6.4.dist-info}/METADATA +21 -8
  117. {kailash-0.6.2.dist-info → kailash-0.6.4.dist-info}/RECORD +126 -101
  118. kailash/mcp/__init__.py +0 -53
  119. kailash/mcp/client.py +0 -445
  120. kailash/mcp/server.py +0 -292
  121. kailash/mcp/server_enhanced.py +0 -449
  122. kailash/mcp/utils/cache.py +0 -267
  123. /kailash/{mcp → mcp_server}/client_new.py +0 -0
  124. /kailash/{mcp → mcp_server}/utils/__init__.py +0 -0
  125. /kailash/{mcp → mcp_server}/utils/config.py +0 -0
  126. /kailash/{mcp → mcp_server}/utils/formatters.py +0 -0
  127. /kailash/{mcp → mcp_server}/utils/metrics.py +0 -0
  128. {kailash-0.6.2.dist-info → kailash-0.6.4.dist-info}/WHEEL +0 -0
  129. {kailash-0.6.2.dist-info → kailash-0.6.4.dist-info}/entry_points.txt +0 -0
  130. {kailash-0.6.2.dist-info → kailash-0.6.4.dist-info}/licenses/LICENSE +0 -0
  131. {kailash-0.6.2.dist-info → kailash-0.6.4.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,673 @@
1
+ """Enhanced error handling for MCP implementations.
2
+
3
+ This module provides structured error codes, error recovery strategies,
4
+ and enhanced error handling capabilities that build on top of the
5
+ official MCP SDK error handling.
6
+
7
+ Features:
8
+ - Structured error codes following MCP protocol
9
+ - Error recovery and retry strategies
10
+ - Circuit breaker patterns
11
+ - Error aggregation and reporting
12
+ - Graceful degradation mechanisms
13
+
14
+ Examples:
15
+ Structured error handling:
16
+
17
+ >>> try:
18
+ ... result = await client.call_tool("search", {"query": "test"})
19
+ ... except MCPError as e:
20
+ ... if e.is_retryable():
21
+ ... await asyncio.sleep(e.get_retry_delay())
22
+ ... # retry logic
23
+ ... else:
24
+ ... logger.error(f"Non-retryable error: {e}")
25
+
26
+ Error recovery with circuit breaker:
27
+
28
+ >>> circuit_breaker = CircuitBreaker(failure_threshold=5, timeout=60)
29
+ >>> async with circuit_breaker:
30
+ ... result = await risky_operation()
31
+ """
32
+
33
+ import asyncio
34
+ import json
35
+ import logging
36
+ import time
37
+ from abc import ABC, abstractmethod
38
+ from enum import Enum
39
+ from typing import Any, Dict, List, Optional, Type, Union
40
+
41
+ logger = logging.getLogger(__name__)
42
+
43
+
44
+ class MCPErrorCode(Enum):
45
+ """Standardized MCP error codes following JSON-RPC conventions."""
46
+
47
+ # Standard JSON-RPC errors
48
+ PARSE_ERROR = -32700
49
+ INVALID_REQUEST = -32600
50
+ METHOD_NOT_FOUND = -32601
51
+ INVALID_PARAMS = -32602
52
+ INTERNAL_ERROR = -32603
53
+
54
+ # MCP-specific errors (in reserved range -32099 to -32000)
55
+ TRANSPORT_ERROR = -32001
56
+ AUTHENTICATION_FAILED = -32002
57
+ AUTHORIZATION_FAILED = -32003
58
+ RATE_LIMITED = -32004
59
+ TOOL_NOT_FOUND = -32005
60
+ TOOL_EXECUTION_FAILED = -32006
61
+ RESOURCE_NOT_FOUND = -32007
62
+ RESOURCE_ACCESS_FAILED = -32008
63
+ SERVER_UNAVAILABLE = -32009
64
+ PROTOCOL_VERSION_MISMATCH = -32010
65
+ CAPABILITY_NOT_SUPPORTED = -32011
66
+ SESSION_EXPIRED = -32012
67
+ CIRCUIT_BREAKER_OPEN = -32013
68
+
69
+ # Application-specific errors (positive codes)
70
+ VALIDATION_ERROR = 1001
71
+ BUSINESS_LOGIC_ERROR = 1002
72
+ EXTERNAL_SERVICE_ERROR = 1003
73
+ DATA_INTEGRITY_ERROR = 1004
74
+ QUOTA_EXCEEDED = 1005
75
+ REQUEST_TIMEOUT = 1006
76
+ REQUEST_CANCELLED = 1007
77
+
78
+
79
+ class MCPError(Exception):
80
+ """Enhanced MCP error with structured information.
81
+
82
+ Extends the basic exception with MCP-specific error codes,
83
+ retry information, and recovery hints.
84
+
85
+ Args:
86
+ message: Human-readable error message
87
+ error_code: Structured error code
88
+ data: Additional error data
89
+ retryable: Whether the error is retryable
90
+ retry_after: Suggested retry delay in seconds
91
+
92
+ Examples:
93
+ Create structured error:
94
+
95
+ >>> error = MCPError(
96
+ ... "Tool execution failed",
97
+ ... error_code=MCPErrorCode.TOOL_EXECUTION_FAILED,
98
+ ... data={"tool": "search", "reason": "timeout"},
99
+ ... retryable=True,
100
+ ... retry_after=5
101
+ ... )
102
+ """
103
+
104
+ def __init__(
105
+ self,
106
+ message: str,
107
+ error_code: Union[MCPErrorCode, int] = MCPErrorCode.INTERNAL_ERROR,
108
+ data: Optional[Dict[str, Any]] = None,
109
+ retryable: bool = False,
110
+ retry_after: Optional[float] = None,
111
+ cause: Optional[Exception] = None,
112
+ ):
113
+ """Initialize MCP error."""
114
+ super().__init__(message)
115
+ self.message = message
116
+ self.error_code = (
117
+ error_code
118
+ if isinstance(error_code, MCPErrorCode)
119
+ else MCPErrorCode(error_code)
120
+ )
121
+ self.data = data or {}
122
+ self.retryable = retryable
123
+ self.retry_after = retry_after
124
+ self.cause = cause
125
+ self.timestamp = time.time()
126
+
127
+ def to_dict(self) -> Dict[str, Any]:
128
+ """Convert error to dictionary format for JSON-RPC."""
129
+ error_dict = {"code": self.error_code.value, "message": self.message}
130
+
131
+ if self.data:
132
+ error_dict["data"] = self.data
133
+
134
+ return error_dict
135
+
136
+ def is_retryable(self) -> bool:
137
+ """Check if error is retryable."""
138
+ return self.retryable
139
+
140
+ def get_retry_delay(self) -> float:
141
+ """Get suggested retry delay."""
142
+ if self.retry_after is not None:
143
+ return self.retry_after
144
+
145
+ # Default retry delays based on error type
146
+ retry_delays = {
147
+ MCPErrorCode.RATE_LIMITED: 60.0,
148
+ MCPErrorCode.SERVER_UNAVAILABLE: 30.0,
149
+ MCPErrorCode.TRANSPORT_ERROR: 5.0,
150
+ MCPErrorCode.TOOL_EXECUTION_FAILED: 2.0,
151
+ MCPErrorCode.EXTERNAL_SERVICE_ERROR: 10.0,
152
+ }
153
+
154
+ return retry_delays.get(self.error_code, 1.0)
155
+
156
+ def get_severity(self) -> str:
157
+ """Get error severity level."""
158
+ high_severity = {
159
+ MCPErrorCode.AUTHENTICATION_FAILED,
160
+ MCPErrorCode.AUTHORIZATION_FAILED,
161
+ MCPErrorCode.DATA_INTEGRITY_ERROR,
162
+ MCPErrorCode.PROTOCOL_VERSION_MISMATCH,
163
+ }
164
+
165
+ medium_severity = {
166
+ MCPErrorCode.TOOL_NOT_FOUND,
167
+ MCPErrorCode.RESOURCE_NOT_FOUND,
168
+ MCPErrorCode.VALIDATION_ERROR,
169
+ MCPErrorCode.BUSINESS_LOGIC_ERROR,
170
+ }
171
+
172
+ if self.error_code in high_severity:
173
+ return "high"
174
+ elif self.error_code in medium_severity:
175
+ return "medium"
176
+ else:
177
+ return "low"
178
+
179
+
180
+ class TransportError(MCPError):
181
+ """Transport-related error."""
182
+
183
+ def __init__(self, message: str, transport_type: str = "unknown", **kwargs):
184
+ kwargs.setdefault("error_code", MCPErrorCode.TRANSPORT_ERROR)
185
+ kwargs.setdefault("retryable", True)
186
+ kwargs.setdefault("data", {})["transport_type"] = transport_type
187
+ super().__init__(message, **kwargs)
188
+
189
+
190
+ class AuthenticationError(MCPError):
191
+ """Authentication-related error."""
192
+
193
+ def __init__(self, message: str, auth_type: str = "unknown", **kwargs):
194
+ kwargs.setdefault("error_code", MCPErrorCode.AUTHENTICATION_FAILED)
195
+ kwargs.setdefault("retryable", False)
196
+ kwargs.setdefault("data", {})["auth_type"] = auth_type
197
+ super().__init__(message, **kwargs)
198
+
199
+
200
+ class AuthorizationError(MCPError):
201
+ """Authorization-related error."""
202
+
203
+ def __init__(self, message: str, required_permission: str = "", **kwargs):
204
+ kwargs.setdefault("error_code", MCPErrorCode.AUTHORIZATION_FAILED)
205
+ kwargs.setdefault("retryable", False)
206
+ kwargs.setdefault("data", {})["required_permission"] = required_permission
207
+ super().__init__(message, **kwargs)
208
+
209
+
210
+ class RateLimitError(MCPError):
211
+ """Rate limiting error."""
212
+
213
+ def __init__(self, message: str, retry_after: float = 60.0, **kwargs):
214
+ kwargs.setdefault("error_code", MCPErrorCode.RATE_LIMITED)
215
+ kwargs.setdefault("retryable", True)
216
+ kwargs["retry_after"] = retry_after
217
+ super().__init__(message, **kwargs)
218
+
219
+
220
+ class ToolError(MCPError):
221
+ """Tool-related error."""
222
+
223
+ def __init__(self, message: str, tool_name: str = "", **kwargs):
224
+ kwargs.setdefault("error_code", MCPErrorCode.TOOL_EXECUTION_FAILED)
225
+ kwargs.setdefault("retryable", True)
226
+ kwargs.setdefault("data", {})["tool_name"] = tool_name
227
+ super().__init__(message, **kwargs)
228
+
229
+
230
+ class ResourceError(MCPError):
231
+ """Resource-related error."""
232
+
233
+ def __init__(self, message: str, resource_uri: str = "", **kwargs):
234
+ kwargs.setdefault("error_code", MCPErrorCode.RESOURCE_ACCESS_FAILED)
235
+ kwargs.setdefault("retryable", True)
236
+ kwargs.setdefault("data", {})["resource_uri"] = resource_uri
237
+ super().__init__(message, **kwargs)
238
+
239
+
240
+ class ServiceDiscoveryError(MCPError):
241
+ """Service discovery related error."""
242
+
243
+ def __init__(self, message: str, discovery_type: str = "unknown", **kwargs):
244
+ kwargs.setdefault("error_code", MCPErrorCode.SERVER_UNAVAILABLE)
245
+ kwargs.setdefault("retryable", True)
246
+ kwargs.setdefault("data", {})["discovery_type"] = discovery_type
247
+ super().__init__(message, **kwargs)
248
+
249
+
250
+ class ValidationError(MCPError):
251
+ """Validation error."""
252
+
253
+ def __init__(self, message: str, **kwargs):
254
+ kwargs.setdefault("error_code", MCPErrorCode.VALIDATION_ERROR)
255
+ kwargs.setdefault("retryable", False)
256
+ super().__init__(message, **kwargs)
257
+
258
+
259
+ class RetryStrategy(ABC):
260
+ """Abstract base class for retry strategies."""
261
+
262
+ @abstractmethod
263
+ def should_retry(self, error: MCPError, attempt: int) -> bool:
264
+ """Determine if operation should be retried."""
265
+ pass
266
+
267
+ @abstractmethod
268
+ def get_delay(self, error: MCPError, attempt: int) -> float:
269
+ """Get delay before next retry attempt."""
270
+ pass
271
+
272
+
273
+ class ExponentialBackoffRetry(RetryStrategy):
274
+ """Exponential backoff retry strategy.
275
+
276
+ Args:
277
+ max_attempts: Maximum retry attempts
278
+ base_delay: Base delay in seconds
279
+ max_delay: Maximum delay in seconds
280
+ backoff_factor: Exponential backoff factor
281
+ jitter: Add random jitter to prevent thundering herd
282
+
283
+ Examples:
284
+ Create retry strategy:
285
+
286
+ >>> retry = ExponentialBackoffRetry(
287
+ ... max_attempts=5,
288
+ ... base_delay=1.0,
289
+ ... max_delay=60.0,
290
+ ... backoff_factor=2.0,
291
+ ... jitter=True
292
+ ... )
293
+ """
294
+
295
+ def __init__(
296
+ self,
297
+ max_attempts: int = 3,
298
+ base_delay: float = 1.0,
299
+ max_delay: float = 60.0,
300
+ backoff_factor: float = 2.0,
301
+ jitter: bool = True,
302
+ ):
303
+ """Initialize exponential backoff retry."""
304
+ self.max_attempts = max_attempts
305
+ self.base_delay = base_delay
306
+ self.max_delay = max_delay
307
+ self.backoff_factor = backoff_factor
308
+ self.jitter = jitter
309
+
310
+ def should_retry(self, error: MCPError, attempt: int) -> bool:
311
+ """Check if operation should be retried."""
312
+ return (
313
+ attempt < self.max_attempts
314
+ and error.is_retryable()
315
+ and error.get_severity() != "high"
316
+ )
317
+
318
+ def get_delay(self, error: MCPError, attempt: int) -> float:
319
+ """Calculate exponential backoff delay."""
320
+ # Use error's suggested delay if available
321
+ if error.retry_after is not None:
322
+ delay = error.retry_after
323
+ else:
324
+ delay = self.base_delay * (self.backoff_factor ** (attempt - 1))
325
+
326
+ # Apply maximum delay limit
327
+ delay = min(delay, self.max_delay)
328
+
329
+ # Add jitter to prevent thundering herd
330
+ if self.jitter:
331
+ import random
332
+
333
+ delay *= 0.5 + random.random() * 0.5
334
+
335
+ return delay
336
+
337
+
338
+ class CircuitBreakerRetry(RetryStrategy):
339
+ """Circuit breaker retry strategy.
340
+
341
+ Implements the circuit breaker pattern to prevent cascading failures.
342
+
343
+ Args:
344
+ failure_threshold: Number of failures before opening circuit
345
+ timeout: Time to wait before trying to close circuit
346
+ success_threshold: Number of successes needed to close circuit
347
+
348
+ Examples:
349
+ Create circuit breaker:
350
+
351
+ >>> circuit_breaker = CircuitBreakerRetry(
352
+ ... failure_threshold=5,
353
+ ... timeout=60.0,
354
+ ... success_threshold=3
355
+ ... )
356
+ """
357
+
358
+ def __init__(
359
+ self,
360
+ failure_threshold: int = 5,
361
+ timeout: float = 60.0,
362
+ success_threshold: int = 3,
363
+ ):
364
+ """Initialize circuit breaker."""
365
+ self.failure_threshold = failure_threshold
366
+ self.timeout = timeout
367
+ self.success_threshold = success_threshold
368
+
369
+ # Circuit breaker state
370
+ self.failure_count = 0
371
+ self.success_count = 0
372
+ self.last_failure_time = 0
373
+ self.state = "closed" # closed, open, half-open
374
+
375
+ def should_retry(self, error: MCPError, attempt: int) -> bool:
376
+ """Check if operation should be retried based on circuit state."""
377
+ now = time.time()
378
+
379
+ if self.state == "open":
380
+ # Check if timeout has passed
381
+ if now - self.last_failure_time > self.timeout:
382
+ self.state = "half-open"
383
+ self.success_count = 0
384
+ return True
385
+ else:
386
+ return False
387
+
388
+ elif self.state == "half-open":
389
+ # Allow limited retries to test if service recovered
390
+ return self.success_count < self.success_threshold
391
+
392
+ else: # closed
393
+ return error.is_retryable()
394
+
395
+ def get_delay(self, error: MCPError, attempt: int) -> float:
396
+ """Get delay based on circuit state."""
397
+ if self.state == "open":
398
+ return self.timeout - (time.time() - self.last_failure_time)
399
+ else:
400
+ return error.get_retry_delay()
401
+
402
+ def on_success(self):
403
+ """Record successful operation."""
404
+ if self.state == "half-open":
405
+ self.success_count += 1
406
+ if self.success_count >= self.success_threshold:
407
+ self.state = "closed"
408
+ self.failure_count = 0
409
+
410
+ def on_failure(self, error: MCPError):
411
+ """Record failed operation."""
412
+ self.failure_count += 1
413
+ self.last_failure_time = time.time()
414
+
415
+ if self.state == "half-open":
416
+ self.state = "open"
417
+ elif self.failure_count >= self.failure_threshold:
418
+ self.state = "open"
419
+
420
+
421
+ class RetryableOperation:
422
+ """Wrapper for operations with retry logic.
423
+
424
+ Args:
425
+ retry_strategy: Retry strategy to use
426
+ logger: Optional logger for retry events
427
+
428
+ Examples:
429
+ Execute operation with retries:
430
+
431
+ >>> retry_op = RetryableOperation(
432
+ ... ExponentialBackoffRetry(max_attempts=5)
433
+ ... )
434
+ >>> result = await retry_op.execute(risky_function, arg1, arg2)
435
+ """
436
+
437
+ def __init__(
438
+ self, retry_strategy: RetryStrategy, logger: Optional[logging.Logger] = None
439
+ ):
440
+ """Initialize retryable operation."""
441
+ self.retry_strategy = retry_strategy
442
+ self.logger = logger or logging.getLogger(__name__)
443
+
444
+ async def execute(self, func, *args, **kwargs):
445
+ """Execute function with retry logic.
446
+
447
+ Args:
448
+ func: Function to execute (can be sync or async)
449
+ *args: Function arguments
450
+ **kwargs: Function keyword arguments
451
+
452
+ Returns:
453
+ Function result
454
+
455
+ Raises:
456
+ MCPError: If all retry attempts failed
457
+ """
458
+ attempt = 0
459
+ last_error = None
460
+
461
+ while True:
462
+ attempt += 1
463
+
464
+ try:
465
+ # Execute function (handle both sync and async)
466
+ if asyncio.iscoroutinefunction(func):
467
+ result = await func(*args, **kwargs)
468
+ else:
469
+ result = func(*args, **kwargs)
470
+
471
+ # Record success for circuit breaker
472
+ if isinstance(self.retry_strategy, CircuitBreakerRetry):
473
+ self.retry_strategy.on_success()
474
+
475
+ return result
476
+
477
+ except MCPError as error:
478
+ last_error = error
479
+
480
+ # Record failure for circuit breaker
481
+ if isinstance(self.retry_strategy, CircuitBreakerRetry):
482
+ self.retry_strategy.on_failure(error)
483
+
484
+ # Check if we should retry
485
+ if not self.retry_strategy.should_retry(error, attempt):
486
+ self.logger.error(
487
+ f"Operation failed after {attempt} attempts: {error}"
488
+ )
489
+ raise error
490
+
491
+ # Calculate retry delay
492
+ delay = self.retry_strategy.get_delay(error, attempt)
493
+ self.logger.warning(
494
+ f"Operation failed (attempt {attempt}), retrying in {delay:.2f}s: {error}"
495
+ )
496
+
497
+ # Wait before retry
498
+ await asyncio.sleep(delay)
499
+
500
+ except Exception as error:
501
+ # Convert unexpected errors to MCPError
502
+ mcp_error = MCPError(
503
+ f"Unexpected error: {error}",
504
+ error_code=MCPErrorCode.INTERNAL_ERROR,
505
+ cause=error,
506
+ retryable=False,
507
+ )
508
+ self.logger.error(f"Unexpected error in retryable operation: {error}")
509
+ raise mcp_error
510
+
511
+
512
+ class ErrorAggregator:
513
+ """Aggregates and reports errors for monitoring.
514
+
515
+ Collects error statistics and provides insights into
516
+ error patterns and trends.
517
+
518
+ Examples:
519
+ Track errors:
520
+
521
+ >>> aggregator = ErrorAggregator()
522
+ >>> aggregator.record_error(error)
523
+ >>> stats = aggregator.get_error_stats()
524
+ """
525
+
526
+ def __init__(self, max_errors: int = 1000):
527
+ """Initialize error aggregator."""
528
+ self.max_errors = max_errors
529
+ self.errors: List[MCPError] = []
530
+ self.error_counts: Dict[MCPErrorCode, int] = {}
531
+
532
+ def record_error(self, error: MCPError):
533
+ """Record an error occurrence."""
534
+ self.errors.append(error)
535
+
536
+ # Keep only recent errors
537
+ if len(self.errors) > self.max_errors:
538
+ self.errors = self.errors[-self.max_errors :]
539
+
540
+ # Update counts
541
+ self.error_counts[error.error_code] = (
542
+ self.error_counts.get(error.error_code, 0) + 1
543
+ )
544
+
545
+ def get_error_stats(self, time_window: Optional[float] = None) -> Dict[str, Any]:
546
+ """Get error statistics.
547
+
548
+ Args:
549
+ time_window: Time window in seconds (None for all errors)
550
+
551
+ Returns:
552
+ Error statistics dictionary
553
+ """
554
+ now = time.time()
555
+
556
+ # Filter errors by time window
557
+ if time_window:
558
+ recent_errors = [e for e in self.errors if now - e.timestamp <= time_window]
559
+ else:
560
+ recent_errors = self.errors
561
+
562
+ if not recent_errors:
563
+ return {"total_errors": 0}
564
+
565
+ # Calculate statistics
566
+ error_codes = [e.error_code for e in recent_errors]
567
+ severity_levels = [e.get_severity() for e in recent_errors]
568
+
569
+ from collections import Counter
570
+
571
+ return {
572
+ "total_errors": len(recent_errors),
573
+ "error_rate": len(recent_errors)
574
+ / max(time_window or 3600, 1), # per second
575
+ "error_codes": dict(Counter(error_codes)),
576
+ "severity_levels": dict(Counter(severity_levels)),
577
+ "most_common_error": (
578
+ Counter(error_codes).most_common(1)[0] if error_codes else None
579
+ ),
580
+ "retryable_errors": len([e for e in recent_errors if e.is_retryable()]),
581
+ "time_window": time_window,
582
+ }
583
+
584
+ def get_error_trends(self, bucket_size: float = 300) -> List[Dict[str, Any]]:
585
+ """Get error trends over time.
586
+
587
+ Args:
588
+ bucket_size: Time bucket size in seconds
589
+
590
+ Returns:
591
+ List of time buckets with error counts
592
+ """
593
+ if not self.errors:
594
+ return []
595
+
596
+ now = time.time()
597
+ oldest_error = min(e.timestamp for e in self.errors)
598
+
599
+ # Create time buckets
600
+ buckets = []
601
+ bucket_start = oldest_error
602
+
603
+ while bucket_start < now:
604
+ bucket_end = bucket_start + bucket_size
605
+ bucket_errors = [
606
+ e for e in self.errors if bucket_start <= e.timestamp < bucket_end
607
+ ]
608
+
609
+ buckets.append(
610
+ {
611
+ "start_time": bucket_start,
612
+ "end_time": bucket_end,
613
+ "error_count": len(bucket_errors),
614
+ "error_codes": list(set(e.error_code for e in bucket_errors)),
615
+ }
616
+ )
617
+
618
+ bucket_start = bucket_end
619
+
620
+ return buckets
621
+
622
+
623
+ # Convenience functions
624
+ def create_retry_operation(
625
+ strategy: str = "exponential", **strategy_kwargs
626
+ ) -> RetryableOperation:
627
+ """Create a retryable operation with the specified strategy.
628
+
629
+ Args:
630
+ strategy: Strategy type ("exponential" or "circuit_breaker")
631
+ **strategy_kwargs: Strategy-specific arguments
632
+
633
+ Returns:
634
+ RetryableOperation instance
635
+ """
636
+ if strategy == "exponential":
637
+ retry_strategy = ExponentialBackoffRetry(**strategy_kwargs)
638
+ elif strategy == "circuit_breaker":
639
+ retry_strategy = CircuitBreakerRetry(**strategy_kwargs)
640
+ else:
641
+ raise ValueError(f"Unknown retry strategy: {strategy}")
642
+
643
+ return RetryableOperation(retry_strategy)
644
+
645
+
646
+ def wrap_with_error_handling(func):
647
+ """Decorator to wrap functions with MCP error handling.
648
+
649
+ Examples:
650
+ >>> @wrap_with_error_handling
651
+ ... async def risky_operation():
652
+ ... # This might fail
653
+ ... return "success"
654
+ """
655
+
656
+ async def wrapper(*args, **kwargs):
657
+ try:
658
+ if asyncio.iscoroutinefunction(func):
659
+ return await func(*args, **kwargs)
660
+ else:
661
+ return func(*args, **kwargs)
662
+ except MCPError:
663
+ raise # Re-raise MCP errors as-is
664
+ except Exception as e:
665
+ # Convert to MCP error
666
+ raise MCPError(
667
+ f"Operation failed: {e}",
668
+ error_code=MCPErrorCode.INTERNAL_ERROR,
669
+ cause=e,
670
+ retryable=True,
671
+ )
672
+
673
+ return wrapper