kailash 0.6.5__py3-none-any.whl → 0.7.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. kailash/__init__.py +35 -4
  2. kailash/adapters/__init__.py +5 -0
  3. kailash/adapters/mcp_platform_adapter.py +273 -0
  4. kailash/channels/__init__.py +21 -0
  5. kailash/channels/api_channel.py +409 -0
  6. kailash/channels/base.py +271 -0
  7. kailash/channels/cli_channel.py +661 -0
  8. kailash/channels/event_router.py +496 -0
  9. kailash/channels/mcp_channel.py +648 -0
  10. kailash/channels/session.py +423 -0
  11. kailash/mcp_server/discovery.py +1 -1
  12. kailash/middleware/core/agent_ui.py +5 -0
  13. kailash/middleware/mcp/enhanced_server.py +22 -16
  14. kailash/nexus/__init__.py +21 -0
  15. kailash/nexus/factory.py +413 -0
  16. kailash/nexus/gateway.py +545 -0
  17. kailash/nodes/__init__.py +2 -0
  18. kailash/nodes/ai/iterative_llm_agent.py +988 -17
  19. kailash/nodes/ai/llm_agent.py +29 -9
  20. kailash/nodes/api/__init__.py +2 -2
  21. kailash/nodes/api/monitoring.py +1 -1
  22. kailash/nodes/base_async.py +54 -14
  23. kailash/nodes/code/async_python.py +1 -1
  24. kailash/nodes/data/bulk_operations.py +939 -0
  25. kailash/nodes/data/query_builder.py +373 -0
  26. kailash/nodes/data/query_cache.py +512 -0
  27. kailash/nodes/monitoring/__init__.py +10 -0
  28. kailash/nodes/monitoring/deadlock_detector.py +964 -0
  29. kailash/nodes/monitoring/performance_anomaly.py +1078 -0
  30. kailash/nodes/monitoring/race_condition_detector.py +1151 -0
  31. kailash/nodes/monitoring/transaction_metrics.py +790 -0
  32. kailash/nodes/monitoring/transaction_monitor.py +931 -0
  33. kailash/nodes/system/__init__.py +17 -0
  34. kailash/nodes/system/command_parser.py +820 -0
  35. kailash/nodes/transaction/__init__.py +48 -0
  36. kailash/nodes/transaction/distributed_transaction_manager.py +983 -0
  37. kailash/nodes/transaction/saga_coordinator.py +652 -0
  38. kailash/nodes/transaction/saga_state_storage.py +411 -0
  39. kailash/nodes/transaction/saga_step.py +467 -0
  40. kailash/nodes/transaction/transaction_context.py +756 -0
  41. kailash/nodes/transaction/two_phase_commit.py +978 -0
  42. kailash/nodes/transform/processors.py +17 -1
  43. kailash/nodes/validation/__init__.py +21 -0
  44. kailash/nodes/validation/test_executor.py +532 -0
  45. kailash/nodes/validation/validation_nodes.py +447 -0
  46. kailash/resources/factory.py +1 -1
  47. kailash/runtime/async_local.py +84 -21
  48. kailash/runtime/local.py +21 -2
  49. kailash/runtime/parameter_injector.py +187 -31
  50. kailash/security.py +16 -1
  51. kailash/servers/__init__.py +32 -0
  52. kailash/servers/durable_workflow_server.py +430 -0
  53. kailash/servers/enterprise_workflow_server.py +466 -0
  54. kailash/servers/gateway.py +183 -0
  55. kailash/servers/workflow_server.py +290 -0
  56. kailash/utils/data_validation.py +192 -0
  57. kailash/workflow/builder.py +291 -12
  58. kailash/workflow/validation.py +144 -8
  59. {kailash-0.6.5.dist-info → kailash-0.7.0.dist-info}/METADATA +1 -1
  60. {kailash-0.6.5.dist-info → kailash-0.7.0.dist-info}/RECORD +64 -26
  61. {kailash-0.6.5.dist-info → kailash-0.7.0.dist-info}/WHEEL +0 -0
  62. {kailash-0.6.5.dist-info → kailash-0.7.0.dist-info}/entry_points.txt +0 -0
  63. {kailash-0.6.5.dist-info → kailash-0.7.0.dist-info}/licenses/LICENSE +0 -0
  64. {kailash-0.6.5.dist-info → kailash-0.7.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,964 @@
1
+ """Deadlock detection and resolution node for database operations.
2
+
3
+ This module provides comprehensive deadlock detection capabilities with
4
+ graph-based analysis, automatic resolution strategies, and detailed reporting.
5
+ """
6
+
7
+ import asyncio
8
+ import logging
9
+ import time
10
+ from collections import defaultdict, deque
11
+ from dataclasses import dataclass, field
12
+ from datetime import UTC, datetime
13
+ from enum import Enum
14
+ from typing import Any, Dict, List, Optional, Set, Tuple
15
+
16
+ from kailash.nodes.base import NodeParameter, register_node
17
+ from kailash.nodes.base_async import AsyncNode
18
+ from kailash.sdk_exceptions import NodeExecutionError
19
+
20
+ logger = logging.getLogger(__name__)
21
+
22
+
23
+ class DeadlockType(Enum):
24
+ """Types of deadlocks that can be detected."""
25
+
26
+ RESOURCE_LOCK = "resource_lock"
27
+ WAIT_FOR_GRAPH = "wait_for_graph"
28
+ TIMEOUT_INFERRED = "timeout_inferred"
29
+ CIRCULAR_DEPENDENCY = "circular_dependency"
30
+
31
+
32
+ class ResolutionStrategy(Enum):
33
+ """Deadlock resolution strategies."""
34
+
35
+ VICTIM_SELECTION = "victim_selection"
36
+ TIMEOUT_ROLLBACK = "timeout_rollback"
37
+ PRIORITY_BASED = "priority_based"
38
+ COST_BASED = "cost_based"
39
+ MANUAL = "manual"
40
+
41
+
42
+ @dataclass
43
+ class ResourceLock:
44
+ """Represents a resource lock in the system."""
45
+
46
+ resource_id: str
47
+ lock_type: str # shared, exclusive, update
48
+ holder_transaction_id: str
49
+ requested_at: float
50
+ granted_at: Optional[float] = None
51
+ timeout: Optional[float] = None
52
+ metadata: Dict[str, Any] = field(default_factory=dict)
53
+
54
+
55
+ @dataclass
56
+ class TransactionWait:
57
+ """Represents a transaction waiting for a resource."""
58
+
59
+ transaction_id: str
60
+ waiting_for_transaction_id: str
61
+ resource_id: str
62
+ wait_start_time: float
63
+ timeout: Optional[float] = None
64
+ priority: int = 0
65
+ cost: float = 0.0
66
+ metadata: Dict[str, Any] = field(default_factory=dict)
67
+
68
+
69
+ @dataclass
70
+ class DeadlockDetection:
71
+ """Represents a detected deadlock."""
72
+
73
+ detection_id: str
74
+ deadlock_type: DeadlockType
75
+ involved_transactions: List[str]
76
+ involved_resources: List[str]
77
+ detection_time: float
78
+ wait_chain: List[TransactionWait]
79
+ victim_candidates: List[str] = field(default_factory=list)
80
+ recommended_strategy: Optional[ResolutionStrategy] = None
81
+ metadata: Dict[str, Any] = field(default_factory=dict)
82
+
83
+
84
+ @register_node()
85
+ class DeadlockDetectorNode(AsyncNode):
86
+ """Node for detecting and resolving database deadlocks.
87
+
88
+ This node provides comprehensive deadlock detection including:
89
+ - Graph-based cycle detection in wait-for graphs
90
+ - Timeout-based deadlock inference
91
+ - Victim selection with multiple strategies
92
+ - Automatic deadlock resolution
93
+ - Detailed deadlock reporting and analysis
94
+ - Integration with database transaction monitoring
95
+
96
+ Design Purpose:
97
+ - Prevent and resolve database deadlocks in production
98
+ - Provide actionable insights for deadlock prevention
99
+ - Support multiple resolution strategies
100
+ - Enable proactive deadlock monitoring
101
+
102
+ Examples:
103
+ >>> # Register active transaction locks
104
+ >>> detector = DeadlockDetectorNode()
105
+ >>> result = await detector.execute(
106
+ ... operation="register_lock",
107
+ ... transaction_id="txn_123",
108
+ ... resource_id="table_orders",
109
+ ... lock_type="exclusive"
110
+ ... )
111
+
112
+ >>> # Register transaction wait
113
+ >>> result = await detector.execute(
114
+ ... operation="register_wait",
115
+ ... transaction_id="txn_456",
116
+ ... waiting_for_transaction_id="txn_123",
117
+ ... resource_id="table_orders"
118
+ ... )
119
+
120
+ >>> # Detect deadlocks
121
+ >>> result = await detector.execute(
122
+ ... operation="detect_deadlocks",
123
+ ... detection_algorithm="wait_for_graph"
124
+ ... )
125
+ """
126
+
127
+ def __init__(self, **kwargs):
128
+ """Initialize the deadlock detector node."""
129
+ super().__init__(**kwargs)
130
+ self._active_locks: Dict[str, ResourceLock] = {}
131
+ self._active_waits: Dict[str, TransactionWait] = {}
132
+ self._wait_for_graph: Dict[str, Set[str]] = defaultdict(set)
133
+ self._transaction_resources: Dict[str, Set[str]] = defaultdict(set)
134
+ self._resource_holders: Dict[str, str] = {}
135
+ self._detected_deadlocks: List[DeadlockDetection] = []
136
+ self._detection_history: List[Dict[str, Any]] = []
137
+ self._monitoring_active = False
138
+ self._background_tasks: Set[asyncio.Task] = set()
139
+ self.logger.info(f"Initialized DeadlockDetectorNode: {self.id}")
140
+
141
+ def get_parameters(self) -> Dict[str, NodeParameter]:
142
+ """Define the parameters this node accepts."""
143
+ return {
144
+ "operation": NodeParameter(
145
+ name="operation",
146
+ type=str,
147
+ required=True,
148
+ description="Operation (register_lock, register_wait, detect_deadlocks, resolve_deadlock, get_status)",
149
+ ),
150
+ "transaction_id": NodeParameter(
151
+ name="transaction_id",
152
+ type=str,
153
+ required=False,
154
+ description="Transaction identifier",
155
+ ),
156
+ "resource_id": NodeParameter(
157
+ name="resource_id",
158
+ type=str,
159
+ required=False,
160
+ description="Resource identifier (table, row, etc.)",
161
+ ),
162
+ "lock_type": NodeParameter(
163
+ name="lock_type",
164
+ type=str,
165
+ required=False,
166
+ default="exclusive",
167
+ description="Type of lock (shared, exclusive, update)",
168
+ ),
169
+ "waiting_for_transaction_id": NodeParameter(
170
+ name="waiting_for_transaction_id",
171
+ type=str,
172
+ required=False,
173
+ description="Transaction ID that this transaction is waiting for",
174
+ ),
175
+ "timeout": NodeParameter(
176
+ name="timeout",
177
+ type=float,
178
+ required=False,
179
+ description="Timeout for lock or wait in seconds",
180
+ ),
181
+ "priority": NodeParameter(
182
+ name="priority",
183
+ type=int,
184
+ required=False,
185
+ default=0,
186
+ description="Transaction priority for victim selection",
187
+ ),
188
+ "cost": NodeParameter(
189
+ name="cost",
190
+ type=float,
191
+ required=False,
192
+ default=0.0,
193
+ description="Transaction cost for victim selection",
194
+ ),
195
+ "detection_algorithm": NodeParameter(
196
+ name="detection_algorithm",
197
+ type=str,
198
+ required=False,
199
+ default="wait_for_graph",
200
+ description="Detection algorithm (wait_for_graph, timeout_based, combined)",
201
+ ),
202
+ "resolution_strategy": NodeParameter(
203
+ name="resolution_strategy",
204
+ type=str,
205
+ required=False,
206
+ default="victim_selection",
207
+ description="Resolution strategy (victim_selection, timeout_rollback, priority_based, cost_based)",
208
+ ),
209
+ "deadlock_id": NodeParameter(
210
+ name="deadlock_id",
211
+ type=str,
212
+ required=False,
213
+ description="Deadlock detection ID for resolution",
214
+ ),
215
+ "victim_transaction_id": NodeParameter(
216
+ name="victim_transaction_id",
217
+ type=str,
218
+ required=False,
219
+ description="Transaction to abort as deadlock victim",
220
+ ),
221
+ "enable_monitoring": NodeParameter(
222
+ name="enable_monitoring",
223
+ type=bool,
224
+ required=False,
225
+ default=False,
226
+ description="Enable continuous deadlock monitoring",
227
+ ),
228
+ "monitoring_interval": NodeParameter(
229
+ name="monitoring_interval",
230
+ type=float,
231
+ required=False,
232
+ default=1.0,
233
+ description="Monitoring interval in seconds",
234
+ ),
235
+ "metadata": NodeParameter(
236
+ name="metadata",
237
+ type=dict,
238
+ required=False,
239
+ default={},
240
+ description="Additional metadata for the operation",
241
+ ),
242
+ }
243
+
244
+ def get_output_schema(self) -> Dict[str, NodeParameter]:
245
+ """Define the output schema for this node."""
246
+ return {
247
+ "deadlocks_detected": NodeParameter(
248
+ name="deadlocks_detected",
249
+ type=list,
250
+ description="List of detected deadlocks",
251
+ ),
252
+ "deadlock_count": NodeParameter(
253
+ name="deadlock_count",
254
+ type=int,
255
+ description="Number of deadlocks detected",
256
+ ),
257
+ "active_locks": NodeParameter(
258
+ name="active_locks", type=int, description="Number of active locks"
259
+ ),
260
+ "active_waits": NodeParameter(
261
+ name="active_waits", type=int, description="Number of active waits"
262
+ ),
263
+ "resolution_actions": NodeParameter(
264
+ name="resolution_actions",
265
+ type=list,
266
+ description="Recommended or taken resolution actions",
267
+ ),
268
+ "wait_for_graph": NodeParameter(
269
+ name="wait_for_graph",
270
+ type=dict,
271
+ description="Current wait-for graph structure",
272
+ ),
273
+ "monitoring_status": NodeParameter(
274
+ name="monitoring_status",
275
+ type=str,
276
+ description="Current monitoring status",
277
+ ),
278
+ "timestamp": NodeParameter(
279
+ name="timestamp", type=str, description="ISO timestamp of operation"
280
+ ),
281
+ "status": NodeParameter(
282
+ name="status", type=str, description="Operation status"
283
+ ),
284
+ }
285
+
286
+ async def async_run(self, **kwargs) -> Dict[str, Any]:
287
+ """Execute deadlock detection operation."""
288
+ operation = kwargs.get("operation")
289
+
290
+ try:
291
+ if operation == "initialize":
292
+ return await self._initialize(**kwargs)
293
+ elif operation == "register_lock":
294
+ return await self._register_lock(**kwargs)
295
+ elif operation == "acquire_resource":
296
+ return await self._register_lock(**kwargs) # Same as register_lock
297
+ elif operation == "request_resource":
298
+ return await self._request_resource(**kwargs) # Custom implementation
299
+ elif operation == "register_wait":
300
+ return await self._register_wait(**kwargs)
301
+ elif operation == "release_lock":
302
+ return await self._release_lock(**kwargs)
303
+ elif operation == "release_resource":
304
+ return await self._release_lock(**kwargs) # Same as release_lock
305
+ elif operation == "detect_deadlocks":
306
+ return await self._detect_deadlocks(**kwargs)
307
+ elif operation == "resolve_deadlock":
308
+ return await self._resolve_deadlock(**kwargs)
309
+ elif operation == "get_status":
310
+ return await self._get_status(**kwargs)
311
+ elif operation == "start_monitoring":
312
+ return await self._start_monitoring(**kwargs)
313
+ elif operation == "stop_monitoring":
314
+ return await self._stop_monitoring(**kwargs)
315
+ else:
316
+ raise ValueError(f"Unknown operation: {operation}")
317
+
318
+ except Exception as e:
319
+ self.logger.error(f"Deadlock detection operation failed: {str(e)}")
320
+ raise NodeExecutionError(f"Failed to execute deadlock detection: {str(e)}")
321
+
322
+ async def _register_lock(self, **kwargs) -> Dict[str, Any]:
323
+ """Register a new resource lock."""
324
+ transaction_id = kwargs.get("transaction_id")
325
+ resource_id = kwargs.get("resource_id")
326
+ lock_type = kwargs.get("lock_type", "exclusive")
327
+ timeout = kwargs.get("timeout")
328
+ metadata = kwargs.get("metadata", {})
329
+
330
+ if not transaction_id or not resource_id:
331
+ raise ValueError("transaction_id and resource_id are required")
332
+
333
+ current_time = time.time()
334
+ lock_id = f"{transaction_id}:{resource_id}"
335
+
336
+ # Create lock record
337
+ lock = ResourceLock(
338
+ resource_id=resource_id,
339
+ lock_type=lock_type,
340
+ holder_transaction_id=transaction_id,
341
+ requested_at=current_time,
342
+ granted_at=current_time,
343
+ timeout=timeout,
344
+ metadata=metadata,
345
+ )
346
+
347
+ # Register lock
348
+ self._active_locks[lock_id] = lock
349
+ self._transaction_resources[transaction_id].add(resource_id)
350
+ self._resource_holders[resource_id] = transaction_id
351
+
352
+ self.logger.debug(
353
+ f"Registered lock: {transaction_id} -> {resource_id} ({lock_type})"
354
+ )
355
+
356
+ return {
357
+ "deadlocks_detected": [],
358
+ "deadlock_count": 0,
359
+ "active_locks": len(self._active_locks),
360
+ "active_waits": len(self._active_waits),
361
+ "resolution_actions": [],
362
+ "wait_for_graph": {k: list(v) for k, v in self._wait_for_graph.items()},
363
+ "monitoring_status": "monitoring" if self._monitoring_active else "idle",
364
+ "timestamp": datetime.now(UTC).isoformat(),
365
+ "status": "success",
366
+ }
367
+
368
+ async def _register_wait(self, **kwargs) -> Dict[str, Any]:
369
+ """Register a transaction wait."""
370
+ transaction_id = kwargs.get("transaction_id")
371
+ waiting_for_transaction_id = kwargs.get("waiting_for_transaction_id")
372
+ resource_id = kwargs.get("resource_id")
373
+ timeout = kwargs.get("timeout")
374
+ priority = kwargs.get("priority", 0)
375
+ cost = kwargs.get("cost", 0.0)
376
+ metadata = kwargs.get("metadata", {})
377
+
378
+ if not transaction_id or not waiting_for_transaction_id:
379
+ raise ValueError(
380
+ "transaction_id and waiting_for_transaction_id are required"
381
+ )
382
+
383
+ current_time = time.time()
384
+ wait_id = f"{transaction_id}:{waiting_for_transaction_id}"
385
+
386
+ # Create wait record
387
+ wait = TransactionWait(
388
+ transaction_id=transaction_id,
389
+ waiting_for_transaction_id=waiting_for_transaction_id,
390
+ resource_id=resource_id or "unknown",
391
+ wait_start_time=current_time,
392
+ timeout=timeout,
393
+ priority=priority,
394
+ cost=cost,
395
+ metadata=metadata,
396
+ )
397
+
398
+ # Register wait and update wait-for graph
399
+ self._active_waits[wait_id] = wait
400
+ self._wait_for_graph[transaction_id].add(waiting_for_transaction_id)
401
+
402
+ self.logger.debug(
403
+ f"Registered wait: {transaction_id} -> {waiting_for_transaction_id}"
404
+ )
405
+
406
+ # Check for immediate deadlock
407
+ deadlocks = await self._detect_cycles_in_wait_graph()
408
+
409
+ return {
410
+ "deadlocks_detected": [self._serialize_deadlock(d) for d in deadlocks],
411
+ "deadlock_count": len(deadlocks),
412
+ "active_locks": len(self._active_locks),
413
+ "active_waits": len(self._active_waits),
414
+ "resolution_actions": [],
415
+ "wait_for_graph": {k: list(v) for k, v in self._wait_for_graph.items()},
416
+ "monitoring_status": "monitoring" if self._monitoring_active else "idle",
417
+ "timestamp": datetime.now(UTC).isoformat(),
418
+ "status": "success",
419
+ }
420
+
421
+ async def _release_lock(self, **kwargs) -> Dict[str, Any]:
422
+ """Release a resource lock."""
423
+ transaction_id = kwargs.get("transaction_id")
424
+ resource_id = kwargs.get("resource_id")
425
+
426
+ if not transaction_id:
427
+ raise ValueError("transaction_id is required")
428
+
429
+ # Remove specific lock or all locks for transaction
430
+ if resource_id:
431
+ lock_id = f"{transaction_id}:{resource_id}"
432
+ if lock_id in self._active_locks:
433
+ del self._active_locks[lock_id]
434
+ self._transaction_resources[transaction_id].discard(resource_id)
435
+ if self._resource_holders.get(resource_id) == transaction_id:
436
+ del self._resource_holders[resource_id]
437
+ else:
438
+ # Remove all locks for transaction
439
+ to_remove = [
440
+ lock_id
441
+ for lock_id in self._active_locks
442
+ if self._active_locks[lock_id].holder_transaction_id == transaction_id
443
+ ]
444
+ for lock_id in to_remove:
445
+ lock = self._active_locks[lock_id]
446
+ del self._active_locks[lock_id]
447
+ self._transaction_resources[transaction_id].discard(lock.resource_id)
448
+ if self._resource_holders.get(lock.resource_id) == transaction_id:
449
+ del self._resource_holders[lock.resource_id]
450
+
451
+ # Remove waits involving this transaction
452
+ to_remove_waits = [
453
+ wait_id
454
+ for wait_id in self._active_waits
455
+ if (
456
+ self._active_waits[wait_id].transaction_id == transaction_id
457
+ or self._active_waits[wait_id].waiting_for_transaction_id
458
+ == transaction_id
459
+ )
460
+ ]
461
+ for wait_id in to_remove_waits:
462
+ wait = self._active_waits[wait_id]
463
+ del self._active_waits[wait_id]
464
+ self._wait_for_graph[wait.transaction_id].discard(
465
+ wait.waiting_for_transaction_id
466
+ )
467
+
468
+ # Clean up empty graph entries
469
+ if (
470
+ transaction_id in self._wait_for_graph
471
+ and not self._wait_for_graph[transaction_id]
472
+ ):
473
+ del self._wait_for_graph[transaction_id]
474
+
475
+ self.logger.debug(f"Released locks for transaction: {transaction_id}")
476
+
477
+ return {
478
+ "deadlocks_detected": [],
479
+ "deadlock_count": 0,
480
+ "active_locks": len(self._active_locks),
481
+ "active_waits": len(self._active_waits),
482
+ "resolution_actions": [f"Released locks for {transaction_id}"],
483
+ "wait_for_graph": {k: list(v) for k, v in self._wait_for_graph.items()},
484
+ "monitoring_status": "monitoring" if self._monitoring_active else "idle",
485
+ "timestamp": datetime.now(UTC).isoformat(),
486
+ "status": "success",
487
+ }
488
+
489
+ async def _detect_deadlocks(self, **kwargs) -> Dict[str, Any]:
490
+ """Detect deadlocks using specified algorithm."""
491
+ algorithm = kwargs.get("detection_algorithm", "wait_for_graph")
492
+
493
+ deadlocks = []
494
+
495
+ if algorithm in ["wait_for_graph", "combined"]:
496
+ cycle_deadlocks = await self._detect_cycles_in_wait_graph()
497
+ deadlocks.extend(cycle_deadlocks)
498
+
499
+ if algorithm in ["timeout_based", "combined"]:
500
+ timeout_deadlocks = await self._detect_timeout_deadlocks()
501
+ deadlocks.extend(timeout_deadlocks)
502
+
503
+ # Store detected deadlocks
504
+ self._detected_deadlocks.extend(deadlocks)
505
+
506
+ # Add to detection history
507
+ self._detection_history.append(
508
+ {
509
+ "timestamp": time.time(),
510
+ "algorithm": algorithm,
511
+ "deadlocks_found": len(deadlocks),
512
+ "deadlock_ids": [d.detection_id for d in deadlocks],
513
+ }
514
+ )
515
+
516
+ # Generate resolution recommendations
517
+ resolution_actions = []
518
+ for deadlock in deadlocks:
519
+ deadlock.victim_candidates = self._select_victim_candidates(deadlock)
520
+ deadlock.recommended_strategy = self._recommend_resolution_strategy(
521
+ deadlock
522
+ )
523
+ resolution_actions.append(
524
+ {
525
+ "deadlock_id": deadlock.detection_id,
526
+ "recommended_strategy": deadlock.recommended_strategy.value,
527
+ "victim_candidates": deadlock.victim_candidates,
528
+ }
529
+ )
530
+
531
+ self.logger.info(
532
+ f"Detected {len(deadlocks)} deadlocks using {algorithm} algorithm"
533
+ )
534
+
535
+ return {
536
+ "deadlocks_detected": [self._serialize_deadlock(d) for d in deadlocks],
537
+ "deadlock_count": len(deadlocks),
538
+ "active_locks": len(self._active_locks),
539
+ "active_waits": len(self._active_waits),
540
+ "resolution_actions": resolution_actions,
541
+ "wait_for_graph": {k: list(v) for k, v in self._wait_for_graph.items()},
542
+ "monitoring_status": "monitoring" if self._monitoring_active else "idle",
543
+ "timestamp": datetime.now(UTC).isoformat(),
544
+ "status": "success",
545
+ }
546
+
547
+ async def _detect_cycles_in_wait_graph(self) -> List[DeadlockDetection]:
548
+ """Detect cycles in the wait-for graph using DFS."""
549
+ deadlocks = []
550
+ visited = set()
551
+ rec_stack = set()
552
+
553
+ def dfs_cycle_detection(node: str, path: List[str]) -> Optional[List[str]]:
554
+ """DFS-based cycle detection."""
555
+ if node in rec_stack:
556
+ # Found cycle - extract it
557
+ cycle_start_idx = path.index(node)
558
+ return path[cycle_start_idx:] + [node]
559
+
560
+ if node in visited:
561
+ return None
562
+
563
+ visited.add(node)
564
+ rec_stack.add(node)
565
+ path.append(node)
566
+
567
+ for neighbor in self._wait_for_graph.get(node, set()):
568
+ cycle = dfs_cycle_detection(neighbor, path)
569
+ if cycle:
570
+ return cycle
571
+
572
+ rec_stack.remove(node)
573
+ path.pop()
574
+ return None
575
+
576
+ # Check each unvisited node
577
+ for transaction_id in self._wait_for_graph:
578
+ if transaction_id not in visited:
579
+ cycle = dfs_cycle_detection(transaction_id, [])
580
+ if cycle:
581
+ # Create deadlock detection
582
+ deadlock = self._create_deadlock_from_cycle(cycle)
583
+ deadlocks.append(deadlock)
584
+
585
+ return deadlocks
586
+
587
+ async def _detect_timeout_deadlocks(self) -> List[DeadlockDetection]:
588
+ """Detect deadlocks based on wait timeouts."""
589
+ deadlocks = []
590
+ current_time = time.time()
591
+
592
+ # Group waits that have exceeded their timeout
593
+ timeout_waits = []
594
+ for wait in self._active_waits.values():
595
+ if wait.timeout and (current_time - wait.wait_start_time) > wait.timeout:
596
+ timeout_waits.append(wait)
597
+
598
+ # Create deadlock detection for timeout-based inference
599
+ if timeout_waits:
600
+ detection_id = f"timeout_deadlock_{int(current_time)}"
601
+ involved_transactions = list(set(w.transaction_id for w in timeout_waits))
602
+ involved_resources = list(set(w.resource_id for w in timeout_waits))
603
+
604
+ deadlock = DeadlockDetection(
605
+ detection_id=detection_id,
606
+ deadlock_type=DeadlockType.TIMEOUT_INFERRED,
607
+ involved_transactions=involved_transactions,
608
+ involved_resources=involved_resources,
609
+ detection_time=current_time,
610
+ wait_chain=timeout_waits,
611
+ metadata={
612
+ "timeout_count": len(timeout_waits),
613
+ "max_wait_time": max(
614
+ current_time - w.wait_start_time for w in timeout_waits
615
+ ),
616
+ },
617
+ )
618
+
619
+ deadlocks.append(deadlock)
620
+
621
+ return deadlocks
622
+
623
+ def _create_deadlock_from_cycle(self, cycle: List[str]) -> DeadlockDetection:
624
+ """Create a deadlock detection from a detected cycle."""
625
+ current_time = time.time()
626
+ detection_id = f"cycle_deadlock_{int(current_time)}_{len(cycle)}"
627
+
628
+ # Build wait chain from cycle
629
+ wait_chain = []
630
+ involved_resources = set()
631
+
632
+ for i in range(len(cycle) - 1):
633
+ current_txn = cycle[i]
634
+ next_txn = cycle[i + 1]
635
+
636
+ # Find the wait relationship
637
+ wait_id = f"{current_txn}:{next_txn}"
638
+ if wait_id in self._active_waits:
639
+ wait = self._active_waits[wait_id]
640
+ wait_chain.append(wait)
641
+ involved_resources.add(wait.resource_id)
642
+
643
+ return DeadlockDetection(
644
+ detection_id=detection_id,
645
+ deadlock_type=DeadlockType.WAIT_FOR_GRAPH,
646
+ involved_transactions=cycle[:-1], # Remove duplicate last element
647
+ involved_resources=list(involved_resources),
648
+ detection_time=current_time,
649
+ wait_chain=wait_chain,
650
+ metadata={"cycle_length": len(cycle) - 1, "cycle_path": " -> ".join(cycle)},
651
+ )
652
+
653
+ def _select_victim_candidates(self, deadlock: DeadlockDetection) -> List[str]:
654
+ """Select victim candidates for deadlock resolution."""
655
+ candidates = []
656
+
657
+ # Priority-based selection (lower priority = better victim)
658
+ if deadlock.wait_chain:
659
+ wait_priorities = [
660
+ (w.transaction_id, w.priority) for w in deadlock.wait_chain
661
+ ]
662
+ min_priority = min(p for _, p in wait_priorities)
663
+ candidates.extend([txn for txn, p in wait_priorities if p == min_priority])
664
+
665
+ # Cost-based selection (lower cost = better victim)
666
+ if deadlock.wait_chain and not candidates:
667
+ wait_costs = [(w.transaction_id, w.cost) for w in deadlock.wait_chain]
668
+ min_cost = min(c for _, c in wait_costs)
669
+ candidates.extend([txn for txn, c in wait_costs if c == min_cost])
670
+
671
+ # Default: select transaction with shortest wait time
672
+ if deadlock.wait_chain and not candidates:
673
+ wait_times = [
674
+ (w.transaction_id, w.wait_start_time) for w in deadlock.wait_chain
675
+ ]
676
+ latest_start = max(t for _, t in wait_times)
677
+ candidates.extend([txn for txn, t in wait_times if t == latest_start])
678
+
679
+ # Fallback: first transaction in the list
680
+ if not candidates and deadlock.involved_transactions:
681
+ candidates.append(deadlock.involved_transactions[0])
682
+
683
+ return list(set(candidates)) # Remove duplicates
684
+
685
+ def _recommend_resolution_strategy(
686
+ self, deadlock: DeadlockDetection
687
+ ) -> ResolutionStrategy:
688
+ """Recommend a resolution strategy for the deadlock."""
689
+ if deadlock.deadlock_type == DeadlockType.TIMEOUT_INFERRED:
690
+ return ResolutionStrategy.TIMEOUT_ROLLBACK
691
+
692
+ if deadlock.wait_chain:
693
+ # Check if we have priority information
694
+ has_priorities = any(w.priority != 0 for w in deadlock.wait_chain)
695
+ if has_priorities:
696
+ return ResolutionStrategy.PRIORITY_BASED
697
+
698
+ # Check if we have cost information
699
+ has_costs = any(w.cost != 0.0 for w in deadlock.wait_chain)
700
+ if has_costs:
701
+ return ResolutionStrategy.COST_BASED
702
+
703
+ return ResolutionStrategy.VICTIM_SELECTION
704
+
705
+ async def _resolve_deadlock(self, **kwargs) -> Dict[str, Any]:
706
+ """Resolve a detected deadlock."""
707
+ deadlock_id = kwargs.get("deadlock_id")
708
+ victim_transaction_id = kwargs.get("victim_transaction_id")
709
+ strategy = kwargs.get("resolution_strategy", "victim_selection")
710
+
711
+ if not deadlock_id:
712
+ raise ValueError("deadlock_id is required")
713
+
714
+ # Find the deadlock
715
+ deadlock = next(
716
+ (d for d in self._detected_deadlocks if d.detection_id == deadlock_id), None
717
+ )
718
+ if not deadlock:
719
+ raise ValueError(f"Deadlock {deadlock_id} not found")
720
+
721
+ resolution_actions = []
722
+
723
+ # Determine victim if not specified
724
+ if not victim_transaction_id:
725
+ if deadlock.victim_candidates:
726
+ victim_transaction_id = deadlock.victim_candidates[0]
727
+ else:
728
+ victim_transaction_id = deadlock.involved_transactions[0]
729
+
730
+ # Execute resolution strategy
731
+ if strategy in ["victim_selection", "priority_based", "cost_based"]:
732
+ # Abort victim transaction
733
+ await self._release_lock(transaction_id=victim_transaction_id)
734
+ resolution_actions.append(
735
+ {
736
+ "action": "abort_transaction",
737
+ "transaction_id": victim_transaction_id,
738
+ "reason": f"Deadlock victim selected using {strategy} strategy",
739
+ }
740
+ )
741
+
742
+ elif strategy == "timeout_rollback":
743
+ # Rollback all transactions involved in timeout deadlock
744
+ for txn_id in deadlock.involved_transactions:
745
+ await self._release_lock(transaction_id=txn_id)
746
+ resolution_actions.append(
747
+ {
748
+ "action": "timeout_rollback",
749
+ "transaction_id": txn_id,
750
+ "reason": "Timeout-based deadlock resolution",
751
+ }
752
+ )
753
+
754
+ # Mark deadlock as resolved
755
+ deadlock.metadata["resolved"] = True
756
+ deadlock.metadata["resolution_time"] = time.time()
757
+ deadlock.metadata["resolution_strategy"] = strategy
758
+ deadlock.metadata["victim_transaction"] = victim_transaction_id
759
+
760
+ self.logger.info(
761
+ f"Resolved deadlock {deadlock_id} using {strategy} strategy, victim: {victim_transaction_id}"
762
+ )
763
+
764
+ return {
765
+ "deadlocks_detected": [],
766
+ "deadlock_count": 0,
767
+ "active_locks": len(self._active_locks),
768
+ "active_waits": len(self._active_waits),
769
+ "resolution_actions": resolution_actions,
770
+ "wait_for_graph": {k: list(v) for k, v in self._wait_for_graph.items()},
771
+ "monitoring_status": "monitoring" if self._monitoring_active else "idle",
772
+ "timestamp": datetime.now(UTC).isoformat(),
773
+ "status": "success",
774
+ }
775
+
776
+ async def _request_resource(self, **kwargs) -> Dict[str, Any]:
777
+ """Request a resource - simplified version for E2E testing."""
778
+ transaction_id = kwargs.get("transaction_id")
779
+ resource_id = kwargs.get("resource_id")
780
+ resource_type = kwargs.get("resource_type", "database_table")
781
+ lock_type = kwargs.get("lock_type", "SHARED")
782
+
783
+ if not transaction_id or not resource_id:
784
+ raise ValueError("transaction_id and resource_id are required")
785
+
786
+ # For E2E testing, just track the request
787
+ current_time = time.time()
788
+
789
+ # Return status for tracking
790
+ return {
791
+ "deadlocks_detected": [
792
+ self._serialize_deadlock(d) for d in self._detected_deadlocks
793
+ ],
794
+ "deadlock_count": len(self._detected_deadlocks),
795
+ "active_locks": len(self._active_locks),
796
+ "active_waits": len(self._active_waits),
797
+ "resolution_actions": [],
798
+ "wait_for_graph": {k: list(v) for k, v in self._wait_for_graph.items()},
799
+ "monitoring_status": f"requested_{resource_type}_{lock_type}".lower(),
800
+ "timestamp": datetime.now(UTC).isoformat(),
801
+ "status": "success",
802
+ }
803
+
804
+ async def _initialize(self, **kwargs) -> Dict[str, Any]:
805
+ """Initialize the deadlock detector."""
806
+ # Reset internal state
807
+ self._active_locks.clear()
808
+ self._active_waits.clear()
809
+ self._detected_deadlocks.clear()
810
+ self._monitoring_active = False
811
+
812
+ # Initialize with provided configuration
813
+ if "deadlock_timeout" in kwargs:
814
+ self._deadlock_timeout = kwargs["deadlock_timeout"]
815
+ if "cycle_detection_enabled" in kwargs:
816
+ self._cycle_detection_enabled = kwargs["cycle_detection_enabled"]
817
+ if "timeout_detection_enabled" in kwargs:
818
+ self._timeout_detection_enabled = kwargs["timeout_detection_enabled"]
819
+
820
+ return {
821
+ "deadlocks_detected": [
822
+ self._serialize_deadlock(d) for d in self._detected_deadlocks
823
+ ],
824
+ "deadlock_count": len(self._detected_deadlocks),
825
+ "active_locks": len(self._active_locks),
826
+ "active_waits": len(self._active_waits),
827
+ "resolution_actions": [],
828
+ "wait_for_graph": {k: list(v) for k, v in self._wait_for_graph.items()},
829
+ "monitoring_status": "initialized",
830
+ "timestamp": datetime.now(UTC).isoformat(),
831
+ "status": "success",
832
+ }
833
+
834
+ async def _get_status(self, **kwargs) -> Dict[str, Any]:
835
+ """Get current deadlock detector status."""
836
+ return {
837
+ "deadlocks_detected": [
838
+ self._serialize_deadlock(d) for d in self._detected_deadlocks
839
+ ],
840
+ "deadlock_count": len(self._detected_deadlocks),
841
+ "active_locks": len(self._active_locks),
842
+ "active_waits": len(self._active_waits),
843
+ "resolution_actions": [],
844
+ "wait_for_graph": {k: list(v) for k, v in self._wait_for_graph.items()},
845
+ "monitoring_status": "monitoring" if self._monitoring_active else "idle",
846
+ "timestamp": datetime.now(UTC).isoformat(),
847
+ "status": "success",
848
+ }
849
+
850
+ async def _start_monitoring(self, **kwargs) -> Dict[str, Any]:
851
+ """Start continuous deadlock monitoring."""
852
+ interval = kwargs.get("monitoring_interval", 1.0)
853
+
854
+ if not self._monitoring_active:
855
+ self._monitoring_active = True
856
+ monitoring_task = asyncio.create_task(self._monitoring_loop(interval))
857
+ self._background_tasks.add(monitoring_task)
858
+ monitoring_task.add_done_callback(self._background_tasks.discard)
859
+
860
+ return {
861
+ "deadlocks_detected": [],
862
+ "deadlock_count": 0,
863
+ "active_locks": len(self._active_locks),
864
+ "active_waits": len(self._active_waits),
865
+ "resolution_actions": [],
866
+ "wait_for_graph": {k: list(v) for k, v in self._wait_for_graph.items()},
867
+ "monitoring_status": "monitoring",
868
+ "timestamp": datetime.now(UTC).isoformat(),
869
+ "status": "success",
870
+ }
871
+
872
+ async def _stop_monitoring(self, **kwargs) -> Dict[str, Any]:
873
+ """Stop continuous deadlock monitoring."""
874
+ self._monitoring_active = False
875
+
876
+ # Cancel background tasks
877
+ for task in self._background_tasks:
878
+ if not task.done():
879
+ task.cancel()
880
+
881
+ # Wait for tasks to complete
882
+ if self._background_tasks:
883
+ await asyncio.gather(*self._background_tasks, return_exceptions=True)
884
+
885
+ self._background_tasks.clear()
886
+
887
+ return {
888
+ "deadlocks_detected": [],
889
+ "deadlock_count": 0,
890
+ "active_locks": len(self._active_locks),
891
+ "active_waits": len(self._active_waits),
892
+ "resolution_actions": [],
893
+ "wait_for_graph": {k: list(v) for k, v in self._wait_for_graph.items()},
894
+ "monitoring_status": "stopped",
895
+ "timestamp": datetime.now(UTC).isoformat(),
896
+ "status": "success",
897
+ }
898
+
899
+ async def _monitoring_loop(self, interval: float):
900
+ """Background monitoring loop for continuous deadlock detection."""
901
+ while self._monitoring_active:
902
+ try:
903
+ await asyncio.sleep(interval)
904
+
905
+ # Detect deadlocks
906
+ deadlocks = await self._detect_cycles_in_wait_graph()
907
+ timeout_deadlocks = await self._detect_timeout_deadlocks()
908
+
909
+ all_deadlocks = deadlocks + timeout_deadlocks
910
+
911
+ if all_deadlocks:
912
+ self.logger.warning(
913
+ f"Monitoring detected {len(all_deadlocks)} deadlocks"
914
+ )
915
+
916
+ # Store detected deadlocks
917
+ self._detected_deadlocks.extend(all_deadlocks)
918
+
919
+ # TODO: Send alerts or take automatic resolution actions
920
+
921
+ except asyncio.CancelledError:
922
+ break
923
+ except Exception as e:
924
+ self.logger.error(f"Monitoring loop error: {e}")
925
+
926
+ def _serialize_deadlock(self, deadlock: DeadlockDetection) -> Dict[str, Any]:
927
+ """Serialize a deadlock detection to dictionary."""
928
+ return {
929
+ "detection_id": deadlock.detection_id,
930
+ "deadlock_type": deadlock.deadlock_type.value,
931
+ "involved_transactions": deadlock.involved_transactions,
932
+ "involved_resources": deadlock.involved_resources,
933
+ "detection_time": deadlock.detection_time,
934
+ "wait_chain": [
935
+ {
936
+ "transaction_id": w.transaction_id,
937
+ "waiting_for_transaction_id": w.waiting_for_transaction_id,
938
+ "resource_id": w.resource_id,
939
+ "wait_start_time": w.wait_start_time,
940
+ "timeout": w.timeout,
941
+ "priority": w.priority,
942
+ "cost": w.cost,
943
+ }
944
+ for w in deadlock.wait_chain
945
+ ],
946
+ "victim_candidates": deadlock.victim_candidates,
947
+ "recommended_strategy": (
948
+ deadlock.recommended_strategy.value
949
+ if deadlock.recommended_strategy
950
+ else None
951
+ ),
952
+ "metadata": deadlock.metadata,
953
+ }
954
+
955
+ def run(self, **kwargs) -> Dict[str, Any]:
956
+ """Synchronous wrapper for compatibility."""
957
+ import asyncio
958
+
959
+ return asyncio.run(self.async_run(**kwargs))
960
+
961
+ async def cleanup(self):
962
+ """Cleanup resources when node is destroyed."""
963
+ await self._stop_monitoring()
964
+ await super().cleanup() if hasattr(super(), "cleanup") else None