kailash 0.6.6__py3-none-any.whl → 0.7.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. kailash/__init__.py +35 -5
  2. kailash/adapters/__init__.py +5 -0
  3. kailash/adapters/mcp_platform_adapter.py +273 -0
  4. kailash/channels/__init__.py +21 -0
  5. kailash/channels/api_channel.py +409 -0
  6. kailash/channels/base.py +271 -0
  7. kailash/channels/cli_channel.py +661 -0
  8. kailash/channels/event_router.py +496 -0
  9. kailash/channels/mcp_channel.py +648 -0
  10. kailash/channels/session.py +423 -0
  11. kailash/mcp_server/discovery.py +1 -1
  12. kailash/middleware/mcp/enhanced_server.py +22 -16
  13. kailash/nexus/__init__.py +21 -0
  14. kailash/nexus/factory.py +413 -0
  15. kailash/nexus/gateway.py +545 -0
  16. kailash/nodes/__init__.py +2 -0
  17. kailash/nodes/ai/iterative_llm_agent.py +988 -17
  18. kailash/nodes/ai/llm_agent.py +29 -9
  19. kailash/nodes/api/__init__.py +2 -2
  20. kailash/nodes/api/monitoring.py +1 -1
  21. kailash/nodes/base_async.py +54 -14
  22. kailash/nodes/code/async_python.py +1 -1
  23. kailash/nodes/data/bulk_operations.py +939 -0
  24. kailash/nodes/data/query_builder.py +373 -0
  25. kailash/nodes/data/query_cache.py +512 -0
  26. kailash/nodes/monitoring/__init__.py +10 -0
  27. kailash/nodes/monitoring/deadlock_detector.py +964 -0
  28. kailash/nodes/monitoring/performance_anomaly.py +1078 -0
  29. kailash/nodes/monitoring/race_condition_detector.py +1151 -0
  30. kailash/nodes/monitoring/transaction_metrics.py +790 -0
  31. kailash/nodes/monitoring/transaction_monitor.py +931 -0
  32. kailash/nodes/system/__init__.py +17 -0
  33. kailash/nodes/system/command_parser.py +820 -0
  34. kailash/nodes/transaction/__init__.py +48 -0
  35. kailash/nodes/transaction/distributed_transaction_manager.py +983 -0
  36. kailash/nodes/transaction/saga_coordinator.py +652 -0
  37. kailash/nodes/transaction/saga_state_storage.py +411 -0
  38. kailash/nodes/transaction/saga_step.py +467 -0
  39. kailash/nodes/transaction/transaction_context.py +756 -0
  40. kailash/nodes/transaction/two_phase_commit.py +978 -0
  41. kailash/nodes/transform/processors.py +17 -1
  42. kailash/nodes/validation/__init__.py +21 -0
  43. kailash/nodes/validation/test_executor.py +532 -0
  44. kailash/nodes/validation/validation_nodes.py +447 -0
  45. kailash/resources/factory.py +1 -1
  46. kailash/runtime/async_local.py +84 -21
  47. kailash/runtime/local.py +21 -2
  48. kailash/runtime/parameter_injector.py +187 -31
  49. kailash/security.py +16 -1
  50. kailash/servers/__init__.py +32 -0
  51. kailash/servers/durable_workflow_server.py +430 -0
  52. kailash/servers/enterprise_workflow_server.py +466 -0
  53. kailash/servers/gateway.py +183 -0
  54. kailash/servers/workflow_server.py +290 -0
  55. kailash/utils/data_validation.py +192 -0
  56. kailash/workflow/builder.py +291 -12
  57. kailash/workflow/validation.py +144 -8
  58. {kailash-0.6.6.dist-info → kailash-0.7.0.dist-info}/METADATA +1 -1
  59. {kailash-0.6.6.dist-info → kailash-0.7.0.dist-info}/RECORD +63 -25
  60. {kailash-0.6.6.dist-info → kailash-0.7.0.dist-info}/WHEEL +0 -0
  61. {kailash-0.6.6.dist-info → kailash-0.7.0.dist-info}/entry_points.txt +0 -0
  62. {kailash-0.6.6.dist-info → kailash-0.7.0.dist-info}/licenses/LICENSE +0 -0
  63. {kailash-0.6.6.dist-info → kailash-0.7.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,983 @@
1
+ """Distributed Transaction Manager for orchestrating different transaction patterns.
2
+
3
+ This module provides a high-level manager that can automatically choose between
4
+ different distributed transaction patterns (Saga, Two-Phase Commit) based on
5
+ transaction requirements and participant capabilities.
6
+
7
+ The Distributed Transaction Manager:
8
+ 1. Analyzes transaction requirements and participant capabilities
9
+ 2. Selects the appropriate transaction pattern (Saga vs 2PC)
10
+ 3. Orchestrates the chosen pattern with optimal configuration
11
+ 4. Provides unified monitoring and recovery across all patterns
12
+ 5. Manages transaction lifecycle and resource cleanup
13
+
14
+ Examples:
15
+ Automatic pattern selection:
16
+
17
+ >>> manager = DistributedTransactionManager()
18
+ >>> result = manager.execute(
19
+ ... transaction_name="order_processing",
20
+ ... participants=[
21
+ ... {"id": "payment", "supports_2pc": True, "compensation": "refund"},
22
+ ... {"id": "inventory", "supports_2pc": True, "compensation": "release"},
23
+ ... {"id": "shipping", "supports_2pc": False, "compensation": "cancel"}
24
+ ... ],
25
+ ... requirements={"consistency": "strong", "availability": "high"}
26
+ ... )
27
+ # Automatically selects Saga due to shipping service not supporting 2PC
28
+
29
+ Explicit pattern selection:
30
+
31
+ >>> manager = DistributedTransactionManager()
32
+ >>> result = manager.execute(
33
+ ... transaction_name="financial_transfer",
34
+ ... pattern="two_phase_commit", # Force 2PC for strong consistency
35
+ ... participants=[...],
36
+ ... context={"amount": 10000.00, "currency": "USD"}
37
+ ... )
38
+
39
+ Enterprise configuration:
40
+
41
+ >>> manager = DistributedTransactionManager(
42
+ ... default_timeout=300,
43
+ ... retry_policy={"max_attempts": 3, "backoff": "exponential"},
44
+ ... monitoring=True,
45
+ ... audit_logging=True,
46
+ ... state_storage="database"
47
+ ... )
48
+ """
49
+
50
+ import asyncio
51
+ import logging
52
+ import time
53
+ import uuid
54
+ from datetime import UTC, datetime
55
+ from enum import Enum
56
+ from typing import Any, Dict, List, Optional, Union
57
+
58
+ from kailash.nodes.base import NodeMetadata, NodeParameter, register_node
59
+ from kailash.nodes.base_async import AsyncNode
60
+ from kailash.sdk_exceptions import NodeConfigurationError, NodeExecutionError
61
+
62
+ logger = logging.getLogger(__name__)
63
+
64
+
65
+ class TransactionPattern(Enum):
66
+ """Available distributed transaction patterns."""
67
+
68
+ SAGA = "saga"
69
+ TWO_PHASE_COMMIT = "two_phase_commit"
70
+ AUTO = "auto" # Automatic selection based on requirements
71
+
72
+
73
+ class ConsistencyLevel(Enum):
74
+ """Transaction consistency requirements."""
75
+
76
+ EVENTUAL = "eventual" # Saga pattern suitable
77
+ STRONG = "strong" # 2PC preferred
78
+ IMMEDIATE = "immediate" # 2PC required
79
+
80
+
81
+ class AvailabilityLevel(Enum):
82
+ """Transaction availability requirements."""
83
+
84
+ HIGH = "high" # Saga pattern preferred
85
+ MEDIUM = "medium" # Either pattern acceptable
86
+ LOW = "low" # 2PC acceptable
87
+
88
+
89
+ class TransactionStatus(Enum):
90
+ """Overall transaction status."""
91
+
92
+ PENDING = "pending"
93
+ RUNNING = "running"
94
+ COMMITTED = "committed"
95
+ ABORTED = "aborted"
96
+ COMPENSATED = "compensated"
97
+ FAILED = "failed"
98
+ RECOVERING = "recovering"
99
+
100
+
101
+ class ParticipantCapability:
102
+ """Represents a participant's transaction capabilities."""
103
+
104
+ def __init__(
105
+ self,
106
+ participant_id: str,
107
+ endpoint: str,
108
+ supports_2pc: bool = False,
109
+ supports_saga: bool = True,
110
+ compensation_action: Optional[str] = None,
111
+ timeout: int = 30,
112
+ retry_count: int = 3,
113
+ priority: int = 1,
114
+ ):
115
+ self.participant_id = participant_id
116
+ self.endpoint = endpoint
117
+ self.supports_2pc = supports_2pc
118
+ self.supports_saga = supports_saga
119
+ self.compensation_action = compensation_action
120
+ self.timeout = timeout
121
+ self.retry_count = retry_count
122
+ self.priority = priority
123
+
124
+ def to_dict(self) -> Dict[str, Any]:
125
+ """Convert to dictionary for serialization."""
126
+ return {
127
+ "participant_id": self.participant_id,
128
+ "endpoint": self.endpoint,
129
+ "supports_2pc": self.supports_2pc,
130
+ "supports_saga": self.supports_saga,
131
+ "compensation_action": self.compensation_action,
132
+ "timeout": self.timeout,
133
+ "retry_count": self.retry_count,
134
+ "priority": self.priority,
135
+ }
136
+
137
+ @classmethod
138
+ def from_dict(cls, data: Dict[str, Any]) -> "ParticipantCapability":
139
+ """Create from dictionary."""
140
+ return cls(
141
+ participant_id=data["participant_id"],
142
+ endpoint=data["endpoint"],
143
+ supports_2pc=data.get("supports_2pc", False),
144
+ supports_saga=data.get("supports_saga", True),
145
+ compensation_action=data.get("compensation_action"),
146
+ timeout=data.get("timeout", 30),
147
+ retry_count=data.get("retry_count", 3),
148
+ priority=data.get("priority", 1),
149
+ )
150
+
151
+
152
+ class TransactionRequirements:
153
+ """Represents transaction requirements for pattern selection."""
154
+
155
+ def __init__(
156
+ self,
157
+ consistency: Union[ConsistencyLevel, str] = ConsistencyLevel.EVENTUAL,
158
+ availability: Union[AvailabilityLevel, str] = AvailabilityLevel.HIGH,
159
+ timeout: int = 300,
160
+ isolation_level: str = "read_committed",
161
+ durability: bool = True,
162
+ allow_partial_failure: bool = True,
163
+ ):
164
+ self.consistency = (
165
+ ConsistencyLevel(consistency)
166
+ if isinstance(consistency, str)
167
+ else consistency
168
+ )
169
+ self.availability = (
170
+ AvailabilityLevel(availability)
171
+ if isinstance(availability, str)
172
+ else availability
173
+ )
174
+ self.timeout = timeout
175
+ self.isolation_level = isolation_level
176
+ self.durability = durability
177
+ self.allow_partial_failure = allow_partial_failure
178
+
179
+
180
+ @register_node("DistributedTransactionManagerNode")
181
+ class DistributedTransactionManagerNode(AsyncNode):
182
+ """High-level distributed transaction manager.
183
+
184
+ This node provides a unified interface for distributed transactions,
185
+ automatically selecting the optimal pattern (Saga vs 2PC) based on
186
+ participant capabilities and transaction requirements.
187
+
188
+ Key Features:
189
+ - Automatic pattern selection based on requirements
190
+ - Unified interface for all transaction patterns
191
+ - Participant capability analysis
192
+ - Transaction lifecycle management
193
+ - Cross-pattern monitoring and recovery
194
+ - Enterprise-grade configuration options
195
+
196
+ Operations:
197
+ - create_transaction: Create new distributed transaction
198
+ - add_participant: Add participant with capabilities
199
+ - execute_transaction: Execute with optimal pattern
200
+ - get_status: Get unified transaction status
201
+ - abort_transaction: Abort active transaction
202
+ - recover_transaction: Recover failed transaction
203
+ - list_transactions: List managed transactions
204
+ """
205
+
206
+ def __init__(
207
+ self,
208
+ transaction_name: str = None,
209
+ transaction_id: str = None,
210
+ default_pattern: Union[TransactionPattern, str] = TransactionPattern.AUTO,
211
+ default_timeout: int = 300,
212
+ state_storage: str = "memory",
213
+ storage_config: Dict[str, Any] = None,
214
+ monitoring_enabled: bool = True,
215
+ audit_logging: bool = False,
216
+ retry_policy: Dict[str, Any] = None,
217
+ **kwargs,
218
+ ):
219
+ """Initialize Distributed Transaction Manager.
220
+
221
+ Args:
222
+ transaction_name: Human-readable transaction name
223
+ transaction_id: Unique transaction identifier
224
+ default_pattern: Default transaction pattern to use
225
+ default_timeout: Default timeout for transactions
226
+ state_storage: Storage backend for transaction state
227
+ storage_config: Configuration for state storage
228
+ monitoring_enabled: Enable transaction monitoring
229
+ audit_logging: Enable audit logging
230
+ retry_policy: Retry configuration
231
+ **kwargs: Additional node configuration
232
+ """
233
+ # Set node metadata
234
+ metadata = NodeMetadata(
235
+ name=kwargs.get("name", "distributed_transaction_manager"),
236
+ description="High-level manager for distributed transactions with pattern selection",
237
+ version="1.0.0",
238
+ tags={"transaction", "distributed", "manager", "saga", "2pc"},
239
+ )
240
+
241
+ # Initialize AsyncNode
242
+ super().__init__(metadata=metadata, **kwargs)
243
+
244
+ # Transaction configuration
245
+ self.transaction_name = transaction_name or f"dtx_{int(time.time())}"
246
+ self.transaction_id = transaction_id or str(uuid.uuid4())
247
+ self.default_pattern = (
248
+ TransactionPattern(default_pattern)
249
+ if isinstance(default_pattern, str)
250
+ else default_pattern
251
+ )
252
+ self.default_timeout = default_timeout
253
+ self.monitoring_enabled = monitoring_enabled
254
+ self.audit_logging = audit_logging
255
+
256
+ # Retry policy
257
+ self.retry_policy = retry_policy or {
258
+ "max_attempts": 3,
259
+ "backoff": "exponential",
260
+ "base_delay": 1.0,
261
+ "max_delay": 30.0,
262
+ }
263
+
264
+ # Transaction state
265
+ self.status = TransactionStatus.PENDING
266
+ self.selected_pattern: Optional[TransactionPattern] = None
267
+ self.participants: List[ParticipantCapability] = []
268
+ self.requirements: Optional[TransactionRequirements] = None
269
+ self.context: Dict[str, Any] = {}
270
+ self.created_at: Optional[datetime] = None
271
+ self.started_at: Optional[datetime] = None
272
+ self.completed_at: Optional[datetime] = None
273
+ self.error_message: Optional[str] = None
274
+
275
+ # Pattern coordinators
276
+ self._saga_coordinator = None
277
+ self._2pc_coordinator = None
278
+ self._active_coordinator = None
279
+
280
+ # State persistence
281
+ self.state_storage = state_storage
282
+ self.storage_config = storage_config or {}
283
+ self._storage = None
284
+
285
+ logger.info(
286
+ f"Initialized Distributed Transaction Manager: {self.transaction_id}"
287
+ )
288
+
289
+ def get_parameters(self) -> Dict[str, NodeParameter]:
290
+ """Get node parameters for validation."""
291
+ return {
292
+ "operation": NodeParameter(
293
+ name="operation",
294
+ type=str,
295
+ required=False,
296
+ description="Transaction operation to execute",
297
+ default="create_transaction",
298
+ ),
299
+ "transaction_name": NodeParameter(
300
+ name="transaction_name",
301
+ type=str,
302
+ required=False,
303
+ description="Human-readable transaction name",
304
+ ),
305
+ "pattern": NodeParameter(
306
+ name="pattern",
307
+ type=str,
308
+ required=False,
309
+ description="Transaction pattern (saga, two_phase_commit, auto)",
310
+ ),
311
+ "participants": NodeParameter(
312
+ name="participants",
313
+ type=list,
314
+ required=False,
315
+ description="List of transaction participants with capabilities",
316
+ ),
317
+ "requirements": NodeParameter(
318
+ name="requirements",
319
+ type=dict,
320
+ required=False,
321
+ description="Transaction requirements for pattern selection",
322
+ ),
323
+ "context": NodeParameter(
324
+ name="context",
325
+ type=dict,
326
+ required=False,
327
+ description="Transaction context data",
328
+ ),
329
+ "transaction_id": NodeParameter(
330
+ name="transaction_id",
331
+ type=str,
332
+ required=False,
333
+ description="Transaction ID for recovery operations",
334
+ ),
335
+ }
336
+
337
+ def get_outputs(self) -> Dict[str, NodeParameter]:
338
+ """Get node outputs."""
339
+ return {
340
+ "status": NodeParameter(
341
+ name="status",
342
+ type=str,
343
+ required=True,
344
+ description="Operation status (success, failed, aborted)",
345
+ ),
346
+ "transaction_id": NodeParameter(
347
+ name="transaction_id",
348
+ type=str,
349
+ required=True,
350
+ description="Transaction identifier",
351
+ ),
352
+ "transaction_status": NodeParameter(
353
+ name="transaction_status",
354
+ type=str,
355
+ required=True,
356
+ description="Current transaction status",
357
+ ),
358
+ "selected_pattern": NodeParameter(
359
+ name="selected_pattern",
360
+ type=str,
361
+ required=False,
362
+ description="Selected transaction pattern",
363
+ ),
364
+ "participants": NodeParameter(
365
+ name="participants",
366
+ type=list,
367
+ required=False,
368
+ description="List of transaction participants",
369
+ ),
370
+ "result": NodeParameter(
371
+ name="result",
372
+ type=dict,
373
+ required=False,
374
+ description="Transaction result data",
375
+ ),
376
+ "error": NodeParameter(
377
+ name="error",
378
+ type=str,
379
+ required=False,
380
+ description="Error message if transaction failed",
381
+ ),
382
+ }
383
+
384
+ async def async_run(self, **kwargs) -> Dict[str, Any]:
385
+ """Execute transaction manager operation asynchronously."""
386
+ operation = kwargs.get("operation", "create_transaction")
387
+
388
+ try:
389
+ if operation == "create_transaction":
390
+ return await self._create_transaction(**kwargs)
391
+ elif operation == "add_participant":
392
+ return await self._add_participant(**kwargs)
393
+ elif operation == "execute_transaction":
394
+ return await self._execute_transaction(**kwargs)
395
+ elif operation == "get_status":
396
+ return await self._get_status()
397
+ elif operation == "abort_transaction":
398
+ return await self._abort_transaction()
399
+ elif operation == "recover_transaction":
400
+ return await self._recover_transaction(**kwargs)
401
+ elif operation == "list_transactions":
402
+ return await self._list_transactions(**kwargs)
403
+ else:
404
+ raise NodeExecutionError(
405
+ f"Unknown transaction manager operation: {operation}"
406
+ )
407
+
408
+ except Exception as e:
409
+ logger.error(f"Distributed transaction manager error: {e}")
410
+ self.error_message = str(e)
411
+ await self._persist_state()
412
+ return {
413
+ "status": "error",
414
+ "transaction_id": self.transaction_id,
415
+ "transaction_status": self.status.value,
416
+ "error": str(e),
417
+ }
418
+
419
+ async def _create_transaction(self, **kwargs) -> Dict[str, Any]:
420
+ """Create a new distributed transaction."""
421
+ if self.status != TransactionStatus.PENDING:
422
+ raise NodeExecutionError(
423
+ f"Transaction already in status: {self.status.value}"
424
+ )
425
+
426
+ # Set transaction name if provided
427
+ transaction_name = kwargs.get("transaction_name")
428
+ if transaction_name:
429
+ self.transaction_name = transaction_name
430
+
431
+ # Set requirements
432
+ requirements_data = kwargs.get("requirements", {})
433
+ self.requirements = TransactionRequirements(**requirements_data)
434
+
435
+ # Update context
436
+ context = kwargs.get("context", {})
437
+ self.context.update(context)
438
+
439
+ # Set creation time
440
+ self.created_at = datetime.now(UTC)
441
+
442
+ logger.info(f"Creating distributed transaction: {self.transaction_id}")
443
+
444
+ # Persist initial state
445
+ await self._persist_state()
446
+
447
+ return {
448
+ "status": "success",
449
+ "transaction_id": self.transaction_id,
450
+ "transaction_status": self.status.value,
451
+ "transaction_name": self.transaction_name,
452
+ "created_at": self.created_at.isoformat(),
453
+ }
454
+
455
+ async def _add_participant(self, **kwargs) -> Dict[str, Any]:
456
+ """Add a participant to the transaction."""
457
+ participant_data = kwargs.get("participant", kwargs) # Support both formats
458
+
459
+ if not participant_data.get("participant_id"):
460
+ raise NodeExecutionError(
461
+ "participant_id is required for add_participant operation"
462
+ )
463
+
464
+ # Create participant capability
465
+ participant = ParticipantCapability.from_dict(participant_data)
466
+
467
+ # Check if participant already exists
468
+ existing_ids = [p.participant_id for p in self.participants]
469
+ if participant.participant_id in existing_ids:
470
+ logger.warning(f"Participant {participant.participant_id} already exists")
471
+ return {
472
+ "status": "exists",
473
+ "transaction_id": self.transaction_id,
474
+ "participant_id": participant.participant_id,
475
+ }
476
+
477
+ self.participants.append(participant)
478
+
479
+ logger.info(
480
+ f"Added participant {participant.participant_id} to transaction {self.transaction_id}"
481
+ )
482
+
483
+ # Persist state
484
+ await self._persist_state()
485
+
486
+ return {
487
+ "status": "success",
488
+ "transaction_id": self.transaction_id,
489
+ "participant_id": participant.participant_id,
490
+ "total_participants": len(self.participants),
491
+ }
492
+
493
+ async def _execute_transaction(self, **kwargs) -> Dict[str, Any]:
494
+ """Execute the distributed transaction with optimal pattern."""
495
+ if not self.participants:
496
+ raise NodeExecutionError("No participants defined for transaction")
497
+
498
+ try:
499
+ # Select pattern if not already selected
500
+ if not self.selected_pattern:
501
+ pattern = kwargs.get("pattern", self.default_pattern)
502
+ if isinstance(pattern, str):
503
+ pattern = TransactionPattern(pattern)
504
+
505
+ if pattern == TransactionPattern.AUTO:
506
+ self.selected_pattern = self._select_optimal_pattern()
507
+ else:
508
+ self.selected_pattern = pattern
509
+ self._validate_pattern_compatibility()
510
+
511
+ # Update status
512
+ self.status = TransactionStatus.RUNNING
513
+ self.started_at = datetime.now(UTC)
514
+ await self._persist_state()
515
+
516
+ logger.info(
517
+ f"Executing transaction {self.transaction_id} with pattern: {self.selected_pattern.value}"
518
+ )
519
+
520
+ # Create and execute appropriate coordinator
521
+ if self.selected_pattern == TransactionPattern.SAGA:
522
+ result = await self._execute_saga_pattern()
523
+ elif self.selected_pattern == TransactionPattern.TWO_PHASE_COMMIT:
524
+ result = await self._execute_2pc_pattern()
525
+ else:
526
+ raise NodeExecutionError(
527
+ f"Unsupported transaction pattern: {self.selected_pattern}"
528
+ )
529
+
530
+ # Update final status
531
+ if result.get("status") == "success":
532
+ if result.get("state") == "committed":
533
+ self.status = TransactionStatus.COMMITTED
534
+ elif result.get("state") == "compensated":
535
+ self.status = TransactionStatus.COMPENSATED
536
+ else:
537
+ self.status = TransactionStatus.COMMITTED
538
+ else:
539
+ if result.get("status") == "aborted":
540
+ self.status = TransactionStatus.ABORTED
541
+ else:
542
+ self.status = TransactionStatus.FAILED
543
+ self.error_message = result.get(
544
+ "error", "Transaction execution failed"
545
+ )
546
+
547
+ self.completed_at = datetime.now(UTC)
548
+ await self._persist_state()
549
+
550
+ return {
551
+ "status": result.get("status", "failed"),
552
+ "transaction_id": self.transaction_id,
553
+ "transaction_status": self.status.value,
554
+ "selected_pattern": self.selected_pattern.value,
555
+ "participants": len(self.participants),
556
+ "execution_time": (self.completed_at - self.started_at).total_seconds(),
557
+ "result": result,
558
+ }
559
+
560
+ except Exception as e:
561
+ logger.error(f"Transaction execution failed: {e}")
562
+ self.status = TransactionStatus.FAILED
563
+ self.error_message = str(e)
564
+ self.completed_at = datetime.now(UTC)
565
+ await self._persist_state()
566
+
567
+ return {
568
+ "status": "failed",
569
+ "transaction_id": self.transaction_id,
570
+ "transaction_status": self.status.value,
571
+ "selected_pattern": (
572
+ self.selected_pattern.value if self.selected_pattern else None
573
+ ),
574
+ "error": str(e),
575
+ }
576
+
577
+ def _select_optimal_pattern(self) -> TransactionPattern:
578
+ """Select optimal transaction pattern based on requirements and capabilities."""
579
+ # Check if all participants support 2PC
580
+ all_support_2pc = all(p.supports_2pc for p in self.participants)
581
+
582
+ # Analyze requirements
583
+ requires_strong_consistency = self.requirements.consistency in [
584
+ ConsistencyLevel.STRONG,
585
+ ConsistencyLevel.IMMEDIATE,
586
+ ]
587
+ requires_high_availability = (
588
+ self.requirements.availability == AvailabilityLevel.HIGH
589
+ )
590
+
591
+ # Pattern selection logic
592
+ if self.requirements.consistency == ConsistencyLevel.IMMEDIATE:
593
+ # Immediate consistency requires 2PC
594
+ if not all_support_2pc:
595
+ raise NodeExecutionError(
596
+ "Immediate consistency requires all participants to support 2PC, "
597
+ f"but participants {[p.participant_id for p in self.participants if not p.supports_2pc]} do not"
598
+ )
599
+ return TransactionPattern.TWO_PHASE_COMMIT
600
+
601
+ elif (
602
+ requires_strong_consistency
603
+ and all_support_2pc
604
+ and not requires_high_availability
605
+ ):
606
+ # Strong consistency preferred with 2PC support and availability not critical
607
+ return TransactionPattern.TWO_PHASE_COMMIT
608
+
609
+ elif requires_high_availability or not all_support_2pc:
610
+ # High availability required or not all participants support 2PC
611
+ return TransactionPattern.SAGA
612
+
613
+ else:
614
+ # Default to Saga for flexibility
615
+ return TransactionPattern.SAGA
616
+
617
+ def _validate_pattern_compatibility(self):
618
+ """Validate that selected pattern is compatible with participants."""
619
+ if self.selected_pattern == TransactionPattern.TWO_PHASE_COMMIT:
620
+ unsupported = [
621
+ p.participant_id for p in self.participants if not p.supports_2pc
622
+ ]
623
+ if unsupported:
624
+ raise NodeExecutionError(
625
+ f"2PC pattern selected but participants {unsupported} do not support 2PC"
626
+ )
627
+
628
+ elif self.selected_pattern == TransactionPattern.SAGA:
629
+ unsupported = [
630
+ p.participant_id for p in self.participants if not p.supports_saga
631
+ ]
632
+ if unsupported:
633
+ raise NodeExecutionError(
634
+ f"Saga pattern selected but participants {unsupported} do not support Saga"
635
+ )
636
+
637
+ async def _execute_saga_pattern(self) -> Dict[str, Any]:
638
+ """Execute transaction using Saga pattern."""
639
+ from .saga_coordinator import SagaCoordinatorNode
640
+
641
+ # Create saga coordinator
642
+ self._saga_coordinator = SagaCoordinatorNode(
643
+ saga_name=self.transaction_name,
644
+ saga_id=self.transaction_id,
645
+ timeout=(
646
+ self.requirements.timeout if self.requirements else self.default_timeout
647
+ ),
648
+ state_storage=self.state_storage,
649
+ storage_config=self.storage_config,
650
+ )
651
+ self._active_coordinator = self._saga_coordinator
652
+
653
+ # Begin saga
654
+ await self._saga_coordinator.async_run(
655
+ operation="create_saga", context=self.context
656
+ )
657
+
658
+ # Add saga steps based on participants
659
+ for participant in sorted(self.participants, key=lambda p: p.priority):
660
+ await self._saga_coordinator.async_run(
661
+ operation="add_step",
662
+ name=f"step_{participant.participant_id}",
663
+ node_id=f"ParticipantNode_{participant.participant_id}",
664
+ parameters={
665
+ "endpoint": participant.endpoint,
666
+ "timeout": participant.timeout,
667
+ "retry_count": participant.retry_count,
668
+ },
669
+ compensation_node_id=f"CompensationNode_{participant.participant_id}",
670
+ compensation_parameters={
671
+ "action": participant.compensation_action or "rollback",
672
+ "endpoint": participant.endpoint,
673
+ },
674
+ )
675
+
676
+ # Execute saga
677
+ return await self._saga_coordinator.async_run(operation="execute_saga")
678
+
679
+ async def _execute_2pc_pattern(self) -> Dict[str, Any]:
680
+ """Execute transaction using Two-Phase Commit pattern."""
681
+ from .two_phase_commit import TwoPhaseCommitCoordinatorNode
682
+
683
+ # Create 2PC coordinator
684
+ self._2pc_coordinator = TwoPhaseCommitCoordinatorNode(
685
+ transaction_name=self.transaction_name,
686
+ transaction_id=self.transaction_id,
687
+ timeout=(
688
+ self.requirements.timeout if self.requirements else self.default_timeout
689
+ ),
690
+ state_storage=self.state_storage,
691
+ storage_config=self.storage_config,
692
+ )
693
+ self._active_coordinator = self._2pc_coordinator
694
+
695
+ # Begin transaction
696
+ await self._2pc_coordinator.async_run(
697
+ operation="begin_transaction", context=self.context
698
+ )
699
+
700
+ # Add participants
701
+ for participant in self.participants:
702
+ await self._2pc_coordinator.async_run(
703
+ operation="add_participant",
704
+ participant_id=participant.participant_id,
705
+ endpoint=participant.endpoint,
706
+ )
707
+
708
+ # Execute 2PC
709
+ return await self._2pc_coordinator.async_run(operation="execute_transaction")
710
+
711
+ async def _get_status(self) -> Dict[str, Any]:
712
+ """Get current transaction status."""
713
+ participant_info = [p.to_dict() for p in self.participants]
714
+
715
+ result = {
716
+ "status": "success",
717
+ "transaction_id": self.transaction_id,
718
+ "transaction_name": self.transaction_name,
719
+ "transaction_status": self.status.value,
720
+ "selected_pattern": (
721
+ self.selected_pattern.value if self.selected_pattern else None
722
+ ),
723
+ "participants": participant_info,
724
+ "context": self.context,
725
+ "created_at": self.created_at.isoformat() if self.created_at else None,
726
+ "started_at": self.started_at.isoformat() if self.started_at else None,
727
+ "completed_at": (
728
+ self.completed_at.isoformat() if self.completed_at else None
729
+ ),
730
+ }
731
+
732
+ # Add requirements info
733
+ if self.requirements:
734
+ result["requirements"] = {
735
+ "consistency": self.requirements.consistency.value,
736
+ "availability": self.requirements.availability.value,
737
+ "timeout": self.requirements.timeout,
738
+ "isolation_level": self.requirements.isolation_level,
739
+ "durability": self.requirements.durability,
740
+ "allow_partial_failure": self.requirements.allow_partial_failure,
741
+ }
742
+
743
+ # Add active coordinator status if available
744
+ if self._active_coordinator:
745
+ try:
746
+ coordinator_status = await self._active_coordinator.async_run(
747
+ operation="get_status"
748
+ )
749
+ result["coordinator_status"] = coordinator_status
750
+ except Exception as e:
751
+ logger.warning(f"Failed to get coordinator status: {e}")
752
+
753
+ if self.error_message:
754
+ result["error"] = self.error_message
755
+
756
+ return result
757
+
758
+ async def _abort_transaction(self) -> Dict[str, Any]:
759
+ """Abort the transaction."""
760
+ if self.status in [
761
+ TransactionStatus.COMMITTED,
762
+ TransactionStatus.ABORTED,
763
+ TransactionStatus.COMPENSATED,
764
+ ]:
765
+ return {
766
+ "status": "already_finished",
767
+ "transaction_id": self.transaction_id,
768
+ "transaction_status": self.status.value,
769
+ }
770
+
771
+ logger.info(f"Aborting transaction {self.transaction_id}")
772
+
773
+ # Abort active coordinator if exists
774
+ if self._active_coordinator:
775
+ try:
776
+ await self._active_coordinator.async_run(operation="abort_transaction")
777
+ except Exception as e:
778
+ logger.warning(f"Failed to abort coordinator: {e}")
779
+
780
+ self.status = TransactionStatus.ABORTED
781
+ self.completed_at = datetime.now(UTC)
782
+ await self._persist_state()
783
+
784
+ return {
785
+ "status": "success",
786
+ "transaction_id": self.transaction_id,
787
+ "transaction_status": self.status.value,
788
+ "aborted_at": self.completed_at.isoformat(),
789
+ }
790
+
791
+ async def _recover_transaction(self, **kwargs) -> Dict[str, Any]:
792
+ """Recover transaction from persistent state."""
793
+ transaction_id = kwargs.get("transaction_id", self.transaction_id)
794
+
795
+ # Initialize storage if not already done
796
+ if not self._storage:
797
+ self._storage = await self._get_storage()
798
+
799
+ if not self._storage:
800
+ raise NodeExecutionError("State storage not configured for recovery")
801
+
802
+ # Load transaction state
803
+ state_data = await self._storage.load_state(transaction_id)
804
+ if not state_data:
805
+ raise NodeExecutionError(f"Transaction {transaction_id} not found")
806
+
807
+ # Restore state
808
+ self._restore_from_state(state_data)
809
+
810
+ logger.info(
811
+ f"Recovered transaction {transaction_id} with pattern {self.selected_pattern}"
812
+ )
813
+
814
+ # Recover appropriate coordinator
815
+ if self.selected_pattern == TransactionPattern.SAGA:
816
+ from .saga_coordinator import SagaCoordinatorNode
817
+
818
+ self._saga_coordinator = SagaCoordinatorNode(
819
+ saga_name=self.transaction_name,
820
+ saga_id=transaction_id,
821
+ state_storage=self.state_storage,
822
+ storage_config=self.storage_config,
823
+ )
824
+ self._active_coordinator = self._saga_coordinator
825
+ # Load the saga state
826
+ load_result = await self._saga_coordinator.async_run(
827
+ operation="load_saga", saga_id=transaction_id
828
+ )
829
+ if load_result.get("status") != "success":
830
+ logger.warning(f"Failed to load saga state: {load_result}")
831
+ return await self._get_status()
832
+ elif self.selected_pattern == TransactionPattern.TWO_PHASE_COMMIT:
833
+ from .two_phase_commit import TwoPhaseCommitCoordinatorNode
834
+
835
+ self._2pc_coordinator = TwoPhaseCommitCoordinatorNode(
836
+ transaction_name=self.transaction_name,
837
+ transaction_id=transaction_id,
838
+ state_storage=self.state_storage,
839
+ storage_config=self.storage_config,
840
+ )
841
+ self._active_coordinator = self._2pc_coordinator
842
+ # For 2PC, we just return the status since the coordinator initializes with the right ID
843
+ return await self._get_status()
844
+
845
+ return await self._get_status()
846
+
847
+ async def _list_transactions(self, **kwargs) -> Dict[str, Any]:
848
+ """List managed transactions."""
849
+ if not self._storage:
850
+ self._storage = await self._get_storage()
851
+
852
+ if not self._storage:
853
+ return {"status": "success", "transactions": [], "count": 0}
854
+
855
+ # Get filter criteria
856
+ filter_criteria = kwargs.get("filter", {})
857
+
858
+ try:
859
+ transaction_ids = await self._storage.list_sagas(filter_criteria)
860
+ return {
861
+ "status": "success",
862
+ "transactions": transaction_ids,
863
+ "count": len(transaction_ids),
864
+ }
865
+ except Exception as e:
866
+ logger.error(f"Failed to list transactions: {e}")
867
+ return {"status": "error", "error": str(e), "transactions": [], "count": 0}
868
+
869
+ async def _persist_state(self):
870
+ """Persist transaction state."""
871
+ if not self._storage:
872
+ self._storage = await self._get_storage()
873
+
874
+ if self._storage:
875
+ state_data = self._get_state_data()
876
+ await self._storage.save_state(self.transaction_id, state_data)
877
+
878
+ async def _get_storage(self):
879
+ """Get storage instance for state persistence."""
880
+ if self.state_storage == "memory":
881
+ from .saga_state_storage import InMemoryStateStorage
882
+
883
+ return InMemoryStateStorage()
884
+ elif self.state_storage == "redis":
885
+ from .saga_state_storage import RedisStateStorage
886
+
887
+ redis_client = self.storage_config.get("redis_client")
888
+ if not redis_client:
889
+ logger.warning("Redis client not provided, using memory storage")
890
+ from .saga_state_storage import InMemoryStateStorage
891
+
892
+ return InMemoryStateStorage()
893
+ return RedisStateStorage(
894
+ redis_client, self.storage_config.get("key_prefix", "dtx:state:")
895
+ )
896
+ elif self.state_storage == "database":
897
+ from .saga_state_storage import DatabaseStateStorage
898
+
899
+ db_pool = self.storage_config.get("db_pool")
900
+ if not db_pool:
901
+ logger.warning("Database pool not provided, using memory storage")
902
+ from .saga_state_storage import InMemoryStateStorage
903
+
904
+ return InMemoryStateStorage()
905
+ return DatabaseStateStorage(
906
+ db_pool,
907
+ self.storage_config.get("table_name", "distributed_transaction_states"),
908
+ )
909
+ else:
910
+ logger.warning(f"Unknown storage type: {self.state_storage}, using memory")
911
+ from .saga_state_storage import InMemoryStateStorage
912
+
913
+ return InMemoryStateStorage()
914
+
915
+ def _get_state_data(self) -> Dict[str, Any]:
916
+ """Get current state as dictionary for persistence."""
917
+ return {
918
+ "transaction_id": self.transaction_id,
919
+ "transaction_name": self.transaction_name,
920
+ "status": self.status.value,
921
+ "selected_pattern": (
922
+ self.selected_pattern.value if self.selected_pattern else None
923
+ ),
924
+ "participants": [p.to_dict() for p in self.participants],
925
+ "requirements": (
926
+ {
927
+ "consistency": self.requirements.consistency.value,
928
+ "availability": self.requirements.availability.value,
929
+ "timeout": self.requirements.timeout,
930
+ "isolation_level": self.requirements.isolation_level,
931
+ "durability": self.requirements.durability,
932
+ "allow_partial_failure": self.requirements.allow_partial_failure,
933
+ }
934
+ if self.requirements
935
+ else None
936
+ ),
937
+ "context": self.context,
938
+ "default_timeout": self.default_timeout,
939
+ "created_at": self.created_at.isoformat() if self.created_at else None,
940
+ "started_at": self.started_at.isoformat() if self.started_at else None,
941
+ "completed_at": (
942
+ self.completed_at.isoformat() if self.completed_at else None
943
+ ),
944
+ "error_message": self.error_message,
945
+ }
946
+
947
+ def _restore_from_state(self, state_data: Dict[str, Any]):
948
+ """Restore transaction state from persistence data."""
949
+ self.transaction_id = state_data["transaction_id"]
950
+ self.transaction_name = state_data["transaction_name"]
951
+ self.status = TransactionStatus(state_data["status"])
952
+
953
+ if state_data.get("selected_pattern"):
954
+ self.selected_pattern = TransactionPattern(state_data["selected_pattern"])
955
+
956
+ # Restore participants
957
+ self.participants = []
958
+ for p_data in state_data.get("participants", []):
959
+ self.participants.append(ParticipantCapability.from_dict(p_data))
960
+
961
+ # Restore requirements
962
+ if state_data.get("requirements"):
963
+ req_data = state_data["requirements"]
964
+ self.requirements = TransactionRequirements(
965
+ consistency=req_data["consistency"],
966
+ availability=req_data["availability"],
967
+ timeout=req_data["timeout"],
968
+ isolation_level=req_data["isolation_level"],
969
+ durability=req_data["durability"],
970
+ allow_partial_failure=req_data["allow_partial_failure"],
971
+ )
972
+
973
+ self.context = state_data.get("context", {})
974
+ self.default_timeout = state_data.get("default_timeout", self.default_timeout)
975
+ self.error_message = state_data.get("error_message")
976
+
977
+ # Restore timestamps
978
+ if state_data.get("created_at"):
979
+ self.created_at = datetime.fromisoformat(state_data["created_at"])
980
+ if state_data.get("started_at"):
981
+ self.started_at = datetime.fromisoformat(state_data["started_at"])
982
+ if state_data.get("completed_at"):
983
+ self.completed_at = datetime.fromisoformat(state_data["completed_at"])