agent_hypervisor 3.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. agent_hypervisor-3.1.0.dist-info/METADATA +824 -0
  2. agent_hypervisor-3.1.0.dist-info/RECORD +60 -0
  3. agent_hypervisor-3.1.0.dist-info/WHEEL +4 -0
  4. agent_hypervisor-3.1.0.dist-info/entry_points.txt +2 -0
  5. agent_hypervisor-3.1.0.dist-info/licenses/LICENSE +21 -0
  6. hypervisor/__init__.py +160 -0
  7. hypervisor/api/__init__.py +7 -0
  8. hypervisor/api/models.py +285 -0
  9. hypervisor/api/server.py +742 -0
  10. hypervisor/audit/__init__.py +4 -0
  11. hypervisor/audit/commitment.py +76 -0
  12. hypervisor/audit/delta.py +135 -0
  13. hypervisor/audit/gc.py +99 -0
  14. hypervisor/cli/__init__.py +3 -0
  15. hypervisor/cli/formatters.py +99 -0
  16. hypervisor/cli/session_commands.py +200 -0
  17. hypervisor/constants.py +106 -0
  18. hypervisor/core.py +352 -0
  19. hypervisor/integrations/__init__.py +10 -0
  20. hypervisor/integrations/iatp_adapter.py +142 -0
  21. hypervisor/integrations/nexus_adapter.py +108 -0
  22. hypervisor/integrations/verification_adapter.py +122 -0
  23. hypervisor/liability/__init__.py +142 -0
  24. hypervisor/liability/attribution.py +86 -0
  25. hypervisor/liability/ledger.py +121 -0
  26. hypervisor/liability/quarantine.py +119 -0
  27. hypervisor/liability/slashing.py +80 -0
  28. hypervisor/liability/vouching.py +134 -0
  29. hypervisor/models.py +277 -0
  30. hypervisor/observability/__init__.py +27 -0
  31. hypervisor/observability/causal_trace.py +70 -0
  32. hypervisor/observability/event_bus.py +222 -0
  33. hypervisor/observability/prometheus_collector.py +248 -0
  34. hypervisor/observability/saga_span_exporter.py +341 -0
  35. hypervisor/providers.py +121 -0
  36. hypervisor/py.typed +0 -0
  37. hypervisor/reversibility/__init__.py +3 -0
  38. hypervisor/reversibility/registry.py +108 -0
  39. hypervisor/rings/__init__.py +21 -0
  40. hypervisor/rings/breach_detector.py +200 -0
  41. hypervisor/rings/classifier.py +78 -0
  42. hypervisor/rings/elevation.py +219 -0
  43. hypervisor/rings/enforcer.py +97 -0
  44. hypervisor/saga/__init__.py +22 -0
  45. hypervisor/saga/checkpoint.py +110 -0
  46. hypervisor/saga/dsl.py +190 -0
  47. hypervisor/saga/fan_out.py +126 -0
  48. hypervisor/saga/orchestrator.py +229 -0
  49. hypervisor/saga/schema.py +244 -0
  50. hypervisor/saga/state_machine.py +157 -0
  51. hypervisor/security/__init__.py +13 -0
  52. hypervisor/security/kill_switch.py +200 -0
  53. hypervisor/security/rate_limiter.py +190 -0
  54. hypervisor/session/__init__.py +194 -0
  55. hypervisor/session/intent_locks.py +118 -0
  56. hypervisor/session/isolation.py +37 -0
  57. hypervisor/session/sso.py +169 -0
  58. hypervisor/session/vector_clock.py +118 -0
  59. hypervisor/verification/__init__.py +3 -0
  60. hypervisor/verification/history.py +173 -0
@@ -0,0 +1,110 @@
1
+ # Copyright (c) Microsoft Corporation.
2
+ # Licensed under the MIT License.
3
+ # Public Preview — basic implementation
4
+ """
5
+ Execution Checkpoints — stub implementation.
6
+
7
+ Public Preview: checkpoints are recorded but replay/skip logic is removed.
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ import hashlib
13
+ import uuid
14
+ from dataclasses import dataclass, field
15
+ from datetime import UTC, datetime
16
+ from typing import Any
17
+
18
+
19
+ @dataclass
20
+ class SemanticCheckpoint:
21
+ """A checkpoint record (Public Preview: stored but not used for replay)."""
22
+
23
+ checkpoint_id: str = field(default_factory=lambda: f"ckpt:{uuid.uuid4().hex[:8]}")
24
+ saga_id: str = ""
25
+ step_id: str = ""
26
+ goal_description: str = ""
27
+ goal_hash: str = ""
28
+ achieved_at: datetime = field(default_factory=lambda: datetime.now(UTC))
29
+ state_snapshot: dict[str, Any] = field(default_factory=dict)
30
+ is_valid: bool = True
31
+ invalidated_reason: str | None = None
32
+
33
+ @staticmethod
34
+ def compute_goal_hash(goal: str, step_id: str) -> str:
35
+ """Compute deterministic hash for a goal."""
36
+ content = f"{goal}:{step_id}"
37
+ return hashlib.sha256(content.encode()).hexdigest()[:16]
38
+
39
+
40
+ class CheckpointManager:
41
+ """
42
+ Checkpoint stub (Public Preview: saves checkpoints but no replay logic).
43
+ """
44
+
45
+ def __init__(self) -> None:
46
+ self._checkpoints: dict[str, list[SemanticCheckpoint]] = {}
47
+ self._by_goal_hash: dict[str, SemanticCheckpoint] = {}
48
+
49
+ def save(
50
+ self,
51
+ saga_id: str,
52
+ step_id: str,
53
+ goal_description: str,
54
+ state_snapshot: dict | None = None,
55
+ ) -> SemanticCheckpoint:
56
+ """Save a checkpoint record."""
57
+ goal_hash = SemanticCheckpoint.compute_goal_hash(goal_description, step_id)
58
+ checkpoint = SemanticCheckpoint(
59
+ saga_id=saga_id,
60
+ step_id=step_id,
61
+ goal_description=goal_description,
62
+ goal_hash=goal_hash,
63
+ state_snapshot=state_snapshot or {},
64
+ )
65
+ self._checkpoints.setdefault(saga_id, []).append(checkpoint)
66
+ self._by_goal_hash[goal_hash] = checkpoint
67
+ return checkpoint
68
+
69
+ def is_achieved(
70
+ self,
71
+ saga_id: str,
72
+ goal_description: str,
73
+ step_id: str,
74
+ ) -> bool:
75
+ """Always returns False (Public Preview: no skip-on-replay)."""
76
+ return False
77
+
78
+ def get_checkpoint(
79
+ self,
80
+ saga_id: str,
81
+ goal_description: str,
82
+ step_id: str,
83
+ ) -> SemanticCheckpoint | None:
84
+ """Returns None (Public Preview: no replay support)."""
85
+ return None
86
+
87
+ def invalidate(
88
+ self,
89
+ saga_id: str,
90
+ step_id: str,
91
+ reason: str = "",
92
+ ) -> int:
93
+ """No-op in Public Preview."""
94
+ return 0
95
+
96
+ def get_saga_checkpoints(self, saga_id: str) -> list[SemanticCheckpoint]:
97
+ """Get all checkpoints for a saga."""
98
+ return list(self._checkpoints.get(saga_id, []))
99
+
100
+ def get_replay_plan(self, saga_id: str, steps: list[str]) -> list[str]:
101
+ """All steps need execution (Public Preview: no skip logic)."""
102
+ return list(steps)
103
+
104
+ @property
105
+ def total_checkpoints(self) -> int:
106
+ return sum(len(v) for v in self._checkpoints.values())
107
+
108
+ @property
109
+ def valid_checkpoints(self) -> int:
110
+ return self.total_checkpoints
hypervisor/saga/dsl.py ADDED
@@ -0,0 +1,190 @@
1
+ # Copyright (c) Microsoft Corporation.
2
+ # Licensed under the MIT License.
3
+ # Public Preview — basic implementation
4
+ """
5
+ Declarative Saga DSL — stub implementation.
6
+
7
+ Public Preview: DSL parsing is retained for basic step definitions only.
8
+ Fan-out groups in DSL are ignored (sequential execution only).
9
+ """
10
+
11
+ from __future__ import annotations
12
+
13
+ import uuid
14
+ from dataclasses import dataclass, field
15
+ from typing import Any
16
+
17
+ from hypervisor.saga.fan_out import FanOutPolicy
18
+ from hypervisor.saga.schema import SagaSchemaValidator
19
+ from hypervisor.saga.state_machine import SagaStep
20
+
21
+
22
+ @dataclass
23
+ class SagaDSLStep:
24
+ """A step parsed from the DSL definition."""
25
+
26
+ id: str = ""
27
+ action_id: str = ""
28
+ agent: str = ""
29
+ execute_api: str = ""
30
+ undo_api: str | None = None
31
+ timeout: int = 300
32
+ retries: int = 0
33
+ checkpoint_goal: str | None = None
34
+
35
+
36
+ @dataclass
37
+ class SagaDSLFanOut:
38
+ """A fan-out group (Public Preview: ignored during execution)."""
39
+
40
+ policy: FanOutPolicy = FanOutPolicy.ALL_MUST_SUCCEED
41
+ branch_step_ids: list[str] = field(default_factory=list)
42
+
43
+
44
+ @dataclass
45
+ class SagaDefinition:
46
+ """A complete saga definition parsed from DSL."""
47
+
48
+ name: str = ""
49
+ session_id: str = ""
50
+ saga_id: str = field(default_factory=lambda: f"saga:{uuid.uuid4().hex[:8]}")
51
+ steps: list[SagaDSLStep] = field(default_factory=list)
52
+ fan_outs: list[SagaDSLFanOut] = field(default_factory=list)
53
+ metadata: dict[str, Any] = field(default_factory=dict)
54
+
55
+ @property
56
+ def step_ids(self) -> list[str]:
57
+ return [s.id for s in self.steps]
58
+
59
+ @property
60
+ def fan_out_step_ids(self) -> set[str]:
61
+ return set()
62
+
63
+ @property
64
+ def sequential_steps(self) -> list[SagaDSLStep]:
65
+ """All steps are sequential in Public Preview."""
66
+ return list(self.steps)
67
+
68
+
69
+ class SagaDSLParser:
70
+ """
71
+ Parses saga definitions from dict.
72
+
73
+ Public Preview: fan-out groups are parsed but ignored during execution.
74
+ """
75
+
76
+ def __init__(self, *, schema_validation: bool = False) -> None:
77
+ self._schema_validator = SagaSchemaValidator() if schema_validation else None
78
+
79
+ def parse(self, definition: dict[str, Any]) -> SagaDefinition:
80
+ """Parse a saga definition dict into a SagaDefinition.
81
+
82
+ If schema_validation was enabled at construction, validates against
83
+ the JSON schema before parsing.
84
+ """
85
+ if self._schema_validator is not None:
86
+ self._schema_validator.validate_or_raise(definition)
87
+
88
+ name = definition.get("name", "")
89
+ if not name:
90
+ raise SagaDSLError("Saga definition must have a 'name'")
91
+
92
+ session_id = definition.get("session_id", "")
93
+ if not session_id:
94
+ raise SagaDSLError("Saga definition must have a 'session_id'")
95
+
96
+ raw_steps = definition.get("steps", [])
97
+ if not raw_steps:
98
+ raise SagaDSLError("Saga must have at least one step")
99
+
100
+ steps = []
101
+ step_ids = set()
102
+ for raw in raw_steps:
103
+ step = self._parse_step(raw)
104
+ if step.id in step_ids:
105
+ raise SagaDSLError(f"Duplicate step ID: {step.id}")
106
+ step_ids.add(step.id)
107
+ steps.append(step)
108
+
109
+ return SagaDefinition(
110
+ name=name,
111
+ session_id=session_id,
112
+ saga_id=definition.get("saga_id", f"saga:{uuid.uuid4().hex[:8]}"),
113
+ steps=steps,
114
+ fan_outs=[],
115
+ metadata=definition.get("metadata", {}),
116
+ )
117
+
118
+ def _parse_step(self, raw: dict) -> SagaDSLStep:
119
+ step_id = raw.get("id", "")
120
+ if not step_id:
121
+ raise SagaDSLError("Each step must have an 'id'")
122
+
123
+ action_id = raw.get("action_id", "")
124
+ if not action_id:
125
+ raise SagaDSLError(f"Step {step_id} must have an 'action_id'")
126
+
127
+ agent = raw.get("agent", "")
128
+ if not agent:
129
+ raise SagaDSLError(f"Step {step_id} must have an 'agent'")
130
+
131
+ return SagaDSLStep(
132
+ id=step_id,
133
+ action_id=action_id,
134
+ agent=agent,
135
+ execute_api=raw.get("execute_api", ""),
136
+ undo_api=raw.get("undo_api"),
137
+ timeout=raw.get("timeout", 300),
138
+ retries=raw.get("retries", 0),
139
+ checkpoint_goal=raw.get("checkpoint_goal"),
140
+ )
141
+
142
+ def _parse_fan_out(self, raw: dict, valid_step_ids: set[str]) -> SagaDSLFanOut:
143
+ """Parse fan-out definition (Public Preview: retained for API compat)."""
144
+ return SagaDSLFanOut(
145
+ policy=FanOutPolicy.ALL_MUST_SUCCEED,
146
+ branch_step_ids=raw.get("branches", []),
147
+ )
148
+
149
+ def to_saga_steps(self, definition: SagaDefinition) -> list[SagaStep]:
150
+ """Convert a SagaDefinition into SagaStep objects."""
151
+ return [
152
+ SagaStep(
153
+ step_id=s.id,
154
+ action_id=s.action_id,
155
+ agent_did=s.agent,
156
+ execute_api=s.execute_api,
157
+ undo_api=s.undo_api,
158
+ timeout_seconds=s.timeout,
159
+ max_retries=s.retries,
160
+ )
161
+ for s in definition.steps
162
+ ]
163
+
164
+ def validate(self, definition: dict[str, Any]) -> list[str]:
165
+ """Validate a definition and return list of errors (empty = valid)."""
166
+ errors = []
167
+ if not definition.get("name"):
168
+ errors.append("Missing 'name'")
169
+ if not definition.get("session_id"):
170
+ errors.append("Missing 'session_id'")
171
+ if not definition.get("steps"):
172
+ errors.append("Missing 'steps'")
173
+ else:
174
+ step_ids = set()
175
+ for i, step in enumerate(definition["steps"]):
176
+ if not step.get("id"):
177
+ errors.append(f"Step {i} missing 'id'")
178
+ elif step["id"] in step_ids:
179
+ errors.append(f"Duplicate step ID: {step['id']}")
180
+ else:
181
+ step_ids.add(step["id"])
182
+ if not step.get("action_id"):
183
+ errors.append(f"Step {step.get('id', i)} missing 'action_id'")
184
+ if not step.get("agent"):
185
+ errors.append(f"Step {step.get('id', i)} missing 'agent'")
186
+ return errors
187
+
188
+
189
+ class SagaDSLError(Exception):
190
+ """Raised for invalid saga DSL definitions."""
@@ -0,0 +1,126 @@
1
+ # Copyright (c) Microsoft Corporation.
2
+ # Licensed under the MIT License.
3
+ # Public Preview — basic implementation
4
+ """
5
+ Parallel Saga Fan-Out — stub implementation.
6
+
7
+ Public Preview: only sequential ALL_MUST_SUCCEED execution.
8
+ Fan-out groups execute branches one at a time.
9
+ """
10
+
11
+ from __future__ import annotations
12
+
13
+ import asyncio
14
+ import uuid
15
+ from collections.abc import Callable
16
+ from dataclasses import dataclass, field
17
+ from enum import Enum
18
+ from typing import Any
19
+
20
+ from hypervisor.saga.state_machine import SagaStep, StepState
21
+
22
+
23
+ class FanOutPolicy(str, Enum):
24
+ ALL_MUST_SUCCEED = "all_must_succeed"
25
+ MAJORITY_MUST_SUCCEED = "majority_must_succeed"
26
+ ANY_MUST_SUCCEED = "any_must_succeed"
27
+
28
+
29
+ @dataclass
30
+ class FanOutBranch:
31
+ branch_id: str = field(default_factory=lambda: f"branch:{uuid.uuid4().hex[:8]}")
32
+ step: SagaStep | None = None
33
+ result: Any = None
34
+ error: str | None = None
35
+ succeeded: bool = False
36
+
37
+
38
+ @dataclass
39
+ class FanOutGroup:
40
+ group_id: str = field(default_factory=lambda: f"fanout:{uuid.uuid4().hex[:8]}")
41
+ saga_id: str = ""
42
+ policy: FanOutPolicy = FanOutPolicy.ALL_MUST_SUCCEED
43
+ branches: list[FanOutBranch] = field(default_factory=list)
44
+ resolved: bool = False
45
+ policy_satisfied: bool = False
46
+ compensation_needed: list[str] = field(default_factory=list)
47
+
48
+ @property
49
+ def success_count(self) -> int:
50
+ return sum(1 for b in self.branches if b.succeeded)
51
+
52
+ @property
53
+ def failure_count(self) -> int:
54
+ return sum(1 for b in self.branches if not b.succeeded and b.error)
55
+
56
+ @property
57
+ def total_branches(self) -> int:
58
+ return len(self.branches)
59
+
60
+ def check_policy(self) -> bool:
61
+ """Public Preview: only ALL_MUST_SUCCEED is enforced."""
62
+ return self.success_count == self.total_branches
63
+
64
+
65
+ class FanOutOrchestrator:
66
+ """Fan-out stub (Public Preview: sequential execution, ALL_MUST_SUCCEED only)."""
67
+
68
+ def __init__(self) -> None:
69
+ self._groups: dict[str, FanOutGroup] = {}
70
+
71
+ def create_group(self, saga_id: str, policy: FanOutPolicy = FanOutPolicy.ALL_MUST_SUCCEED) -> FanOutGroup:
72
+ group = FanOutGroup(saga_id=saga_id, policy=FanOutPolicy.ALL_MUST_SUCCEED)
73
+ self._groups[group.group_id] = group
74
+ return group
75
+
76
+ def add_branch(self, group_id: str, step: SagaStep) -> FanOutBranch:
77
+ group = self._get_group(group_id)
78
+ branch = FanOutBranch(step=step)
79
+ group.branches.append(branch)
80
+ return branch
81
+
82
+ async def execute(
83
+ self, group_id: str, executors: dict[str, Callable[..., Any]], timeout_seconds: int = 300,
84
+ ) -> FanOutGroup:
85
+ """Execute branches sequentially (Public Preview)."""
86
+ group = self._get_group(group_id)
87
+
88
+ for branch in group.branches:
89
+ if not branch.step:
90
+ branch.error = "No step assigned"
91
+ continue
92
+ executor = executors.get(branch.step.step_id)
93
+ if not executor:
94
+ branch.error = f"No executor for step {branch.step.step_id}"
95
+ continue
96
+ try:
97
+ branch.step.transition(StepState.EXECUTING)
98
+ result = await asyncio.wait_for(executor(), timeout=branch.step.timeout_seconds)
99
+ branch.result = result
100
+ branch.succeeded = True
101
+ branch.step.execute_result = result
102
+ branch.step.transition(StepState.COMMITTED)
103
+ except Exception as e:
104
+ branch.error = str(e)
105
+ branch.step.error = str(e)
106
+ branch.step.transition(StepState.FAILED)
107
+ break # ALL_MUST_SUCCEED: stop on first failure
108
+
109
+ group.policy_satisfied = group.check_policy()
110
+ group.resolved = True
111
+ if not group.policy_satisfied:
112
+ group.compensation_needed = [b.step.step_id for b in group.branches if b.succeeded and b.step]
113
+ return group
114
+
115
+ def get_group(self, group_id: str) -> FanOutGroup | None:
116
+ return self._groups.get(group_id)
117
+
118
+ def _get_group(self, group_id: str) -> FanOutGroup:
119
+ group = self._groups.get(group_id)
120
+ if not group:
121
+ raise ValueError(f"Fan-out group {group_id} not found")
122
+ return group
123
+
124
+ @property
125
+ def active_groups(self) -> list[FanOutGroup]:
126
+ return [g for g in self._groups.values() if not g.resolved]
@@ -0,0 +1,229 @@
1
+ # Copyright (c) Microsoft Corporation.
2
+ # Licensed under the MIT License.
3
+ # Public Preview — basic implementation
4
+ """
5
+ Semantic Saga Orchestrator
6
+
7
+ Sequential step execution with reverse-order compensation on failure.
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ import asyncio
13
+ import uuid
14
+ from collections.abc import Callable
15
+ from typing import Any
16
+
17
+ from hypervisor.constants import (
18
+ SAGA_DEFAULT_MAX_RETRIES,
19
+ SAGA_DEFAULT_RETRY_DELAY_SECONDS,
20
+ SAGA_DEFAULT_STEP_TIMEOUT_SECONDS,
21
+ )
22
+ from hypervisor.saga.state_machine import (
23
+ Saga,
24
+ SagaState,
25
+ SagaStateError,
26
+ SagaStep,
27
+ StepState,
28
+ )
29
+
30
+
31
+ class SagaTimeoutError(Exception):
32
+ """Raised when a saga step exceeds its timeout."""
33
+
34
+
35
+ class SagaOrchestrator:
36
+ """
37
+ Orchestrates multi-step agent transactions with saga semantics.
38
+
39
+ Forward execution records each step. On failure, the orchestrator
40
+ iterates the Reversibility Registry in reverse order, calling
41
+ Undo_API for each committed step. If any Undo_API fails,
42
+ Joint Liability penalty is triggered.
43
+ """
44
+
45
+ DEFAULT_MAX_RETRIES = SAGA_DEFAULT_MAX_RETRIES
46
+ DEFAULT_RETRY_DELAY_SECONDS = SAGA_DEFAULT_RETRY_DELAY_SECONDS
47
+
48
+ def __init__(self) -> None:
49
+ self._sagas: dict[str, Saga] = {}
50
+
51
+ def create_saga(self, session_id: str) -> Saga:
52
+ """Create a new saga for a session."""
53
+ saga = Saga(
54
+ saga_id=f"saga:{uuid.uuid4()}",
55
+ session_id=session_id,
56
+ )
57
+ self._sagas[saga.saga_id] = saga
58
+ return saga
59
+
60
+ def add_step(
61
+ self,
62
+ saga_id: str,
63
+ action_id: str,
64
+ agent_did: str,
65
+ execute_api: str,
66
+ undo_api: str | None = None,
67
+ timeout_seconds: int = SAGA_DEFAULT_STEP_TIMEOUT_SECONDS,
68
+ max_retries: int = 0,
69
+ ) -> SagaStep:
70
+ """Add a step to a saga."""
71
+ saga = self._get_saga(saga_id)
72
+ step = SagaStep(
73
+ step_id=f"step:{uuid.uuid4()}",
74
+ action_id=action_id,
75
+ agent_did=agent_did,
76
+ execute_api=execute_api,
77
+ undo_api=undo_api,
78
+ timeout_seconds=timeout_seconds,
79
+ max_retries=max_retries,
80
+ )
81
+ saga.steps.append(step)
82
+ return step
83
+
84
+ async def execute_step(
85
+ self,
86
+ saga_id: str,
87
+ step_id: str,
88
+ executor: Callable[..., Any],
89
+ ) -> Any:
90
+ """
91
+ Execute a single saga step with timeout and retry support.
92
+
93
+ Args:
94
+ saga_id: Saga identifier
95
+ step_id: Step identifier
96
+ executor: Async callable that performs the action
97
+
98
+ Returns:
99
+ Result from the executor
100
+
101
+ Raises:
102
+ SagaStateError: If step is not in PENDING state
103
+ SagaTimeoutError: If step exceeds its timeout
104
+ """
105
+ saga = self._get_saga(saga_id)
106
+ step = self._get_step(saga, step_id)
107
+
108
+ last_error: Exception | None = None
109
+ attempts = 1 + step.max_retries
110
+
111
+ for attempt in range(attempts):
112
+ step.retry_count = attempt
113
+ step.transition(StepState.EXECUTING)
114
+ try:
115
+ result = await asyncio.wait_for(
116
+ executor(),
117
+ timeout=step.timeout_seconds,
118
+ )
119
+ step.execute_result = result
120
+ step.transition(StepState.COMMITTED)
121
+ return result
122
+ except TimeoutError:
123
+ last_error = SagaTimeoutError(
124
+ f"Step {step_id} timed out after {step.timeout_seconds}s "
125
+ f"(attempt {attempt + 1}/{attempts})"
126
+ )
127
+ step.error = str(last_error)
128
+ step.transition(StepState.FAILED)
129
+ if attempt < attempts - 1:
130
+ # Reset to PENDING for retry
131
+ step.state = StepState.PENDING
132
+ step.error = None
133
+ await asyncio.sleep(
134
+ self.DEFAULT_RETRY_DELAY_SECONDS * (attempt + 1)
135
+ )
136
+ except Exception as e:
137
+ last_error = e
138
+ step.error = str(e)
139
+ step.transition(StepState.FAILED)
140
+ if attempt < attempts - 1:
141
+ step.state = StepState.PENDING
142
+ step.error = None
143
+ await asyncio.sleep(
144
+ self.DEFAULT_RETRY_DELAY_SECONDS * (attempt + 1)
145
+ )
146
+
147
+ # All retries exhausted
148
+ if last_error:
149
+ raise last_error
150
+ raise SagaStateError("Step execution failed with no error captured")
151
+
152
+ async def compensate(
153
+ self,
154
+ saga_id: str,
155
+ compensator: Callable[[SagaStep], Any],
156
+ ) -> list[SagaStep]:
157
+ """
158
+ Run compensation (rollback) for all committed steps in reverse order.
159
+
160
+ Args:
161
+ saga_id: Saga identifier
162
+ compensator: Async callable that takes a SagaStep and calls its Undo_API
163
+
164
+ Returns:
165
+ List of steps that failed compensation (empty = full success)
166
+ """
167
+ saga = self._get_saga(saga_id)
168
+ saga.transition(SagaState.COMPENSATING)
169
+
170
+ failed_compensations: list[SagaStep] = []
171
+
172
+ for step in saga.committed_steps_reversed:
173
+ if not step.undo_api:
174
+ step.state = StepState.COMPENSATION_FAILED
175
+ step.error = "No Undo_API available"
176
+ failed_compensations.append(step)
177
+ continue
178
+
179
+ step.transition(StepState.COMPENSATING)
180
+ try:
181
+ result = await asyncio.wait_for(
182
+ compensator(step),
183
+ timeout=step.timeout_seconds,
184
+ )
185
+ step.compensation_result = result
186
+ step.transition(StepState.COMPENSATED)
187
+ except TimeoutError:
188
+ step.error = f"Compensation timed out after {step.timeout_seconds}s"
189
+ step.transition(StepState.COMPENSATION_FAILED)
190
+ failed_compensations.append(step)
191
+ except Exception as e:
192
+ step.error = f"Compensation failed: {e}"
193
+ step.transition(StepState.COMPENSATION_FAILED)
194
+ failed_compensations.append(step)
195
+
196
+ if failed_compensations:
197
+ saga.transition(SagaState.ESCALATED)
198
+ saga.error = (
199
+ f"{len(failed_compensations)} step(s) failed compensation — "
200
+ "Joint Liability penalty triggered"
201
+ )
202
+ else:
203
+ saga.transition(SagaState.COMPLETED)
204
+
205
+ return failed_compensations
206
+
207
+ def get_saga(self, saga_id: str) -> Saga | None:
208
+ """Get a saga by ID."""
209
+ return self._sagas.get(saga_id)
210
+
211
+ @property
212
+ def active_sagas(self) -> list[Saga]:
213
+ """Get all non-terminal sagas."""
214
+ return [
215
+ s for s in self._sagas.values()
216
+ if s.state in (SagaState.RUNNING, SagaState.COMPENSATING)
217
+ ]
218
+
219
+ def _get_saga(self, saga_id: str) -> Saga:
220
+ saga = self._sagas.get(saga_id)
221
+ if not saga:
222
+ raise SagaStateError(f"Saga {saga_id} not found")
223
+ return saga
224
+
225
+ def _get_step(self, saga: Saga, step_id: str) -> SagaStep:
226
+ for step in saga.steps:
227
+ if step.step_id == step_id:
228
+ return step
229
+ raise SagaStateError(f"Step {step_id} not found in saga {saga.saga_id}")