agent_hypervisor 3.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agent_hypervisor-3.1.0.dist-info/METADATA +824 -0
- agent_hypervisor-3.1.0.dist-info/RECORD +60 -0
- agent_hypervisor-3.1.0.dist-info/WHEEL +4 -0
- agent_hypervisor-3.1.0.dist-info/entry_points.txt +2 -0
- agent_hypervisor-3.1.0.dist-info/licenses/LICENSE +21 -0
- hypervisor/__init__.py +160 -0
- hypervisor/api/__init__.py +7 -0
- hypervisor/api/models.py +285 -0
- hypervisor/api/server.py +742 -0
- hypervisor/audit/__init__.py +4 -0
- hypervisor/audit/commitment.py +76 -0
- hypervisor/audit/delta.py +135 -0
- hypervisor/audit/gc.py +99 -0
- hypervisor/cli/__init__.py +3 -0
- hypervisor/cli/formatters.py +99 -0
- hypervisor/cli/session_commands.py +200 -0
- hypervisor/constants.py +106 -0
- hypervisor/core.py +352 -0
- hypervisor/integrations/__init__.py +10 -0
- hypervisor/integrations/iatp_adapter.py +142 -0
- hypervisor/integrations/nexus_adapter.py +108 -0
- hypervisor/integrations/verification_adapter.py +122 -0
- hypervisor/liability/__init__.py +142 -0
- hypervisor/liability/attribution.py +86 -0
- hypervisor/liability/ledger.py +121 -0
- hypervisor/liability/quarantine.py +119 -0
- hypervisor/liability/slashing.py +80 -0
- hypervisor/liability/vouching.py +134 -0
- hypervisor/models.py +277 -0
- hypervisor/observability/__init__.py +27 -0
- hypervisor/observability/causal_trace.py +70 -0
- hypervisor/observability/event_bus.py +222 -0
- hypervisor/observability/prometheus_collector.py +248 -0
- hypervisor/observability/saga_span_exporter.py +341 -0
- hypervisor/providers.py +121 -0
- hypervisor/py.typed +0 -0
- hypervisor/reversibility/__init__.py +3 -0
- hypervisor/reversibility/registry.py +108 -0
- hypervisor/rings/__init__.py +21 -0
- hypervisor/rings/breach_detector.py +200 -0
- hypervisor/rings/classifier.py +78 -0
- hypervisor/rings/elevation.py +219 -0
- hypervisor/rings/enforcer.py +97 -0
- hypervisor/saga/__init__.py +22 -0
- hypervisor/saga/checkpoint.py +110 -0
- hypervisor/saga/dsl.py +190 -0
- hypervisor/saga/fan_out.py +126 -0
- hypervisor/saga/orchestrator.py +229 -0
- hypervisor/saga/schema.py +244 -0
- hypervisor/saga/state_machine.py +157 -0
- hypervisor/security/__init__.py +13 -0
- hypervisor/security/kill_switch.py +200 -0
- hypervisor/security/rate_limiter.py +190 -0
- hypervisor/session/__init__.py +194 -0
- hypervisor/session/intent_locks.py +118 -0
- hypervisor/session/isolation.py +37 -0
- hypervisor/session/sso.py +169 -0
- hypervisor/session/vector_clock.py +118 -0
- hypervisor/verification/__init__.py +3 -0
- hypervisor/verification/history.py +173 -0
|
@@ -0,0 +1,110 @@
|
|
|
1
|
+
# Copyright (c) Microsoft Corporation.
|
|
2
|
+
# Licensed under the MIT License.
|
|
3
|
+
# Public Preview — basic implementation
|
|
4
|
+
"""
|
|
5
|
+
Execution Checkpoints — stub implementation.
|
|
6
|
+
|
|
7
|
+
Public Preview: checkpoints are recorded but replay/skip logic is removed.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from __future__ import annotations
|
|
11
|
+
|
|
12
|
+
import hashlib
|
|
13
|
+
import uuid
|
|
14
|
+
from dataclasses import dataclass, field
|
|
15
|
+
from datetime import UTC, datetime
|
|
16
|
+
from typing import Any
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
@dataclass
|
|
20
|
+
class SemanticCheckpoint:
|
|
21
|
+
"""A checkpoint record (Public Preview: stored but not used for replay)."""
|
|
22
|
+
|
|
23
|
+
checkpoint_id: str = field(default_factory=lambda: f"ckpt:{uuid.uuid4().hex[:8]}")
|
|
24
|
+
saga_id: str = ""
|
|
25
|
+
step_id: str = ""
|
|
26
|
+
goal_description: str = ""
|
|
27
|
+
goal_hash: str = ""
|
|
28
|
+
achieved_at: datetime = field(default_factory=lambda: datetime.now(UTC))
|
|
29
|
+
state_snapshot: dict[str, Any] = field(default_factory=dict)
|
|
30
|
+
is_valid: bool = True
|
|
31
|
+
invalidated_reason: str | None = None
|
|
32
|
+
|
|
33
|
+
@staticmethod
|
|
34
|
+
def compute_goal_hash(goal: str, step_id: str) -> str:
|
|
35
|
+
"""Compute deterministic hash for a goal."""
|
|
36
|
+
content = f"{goal}:{step_id}"
|
|
37
|
+
return hashlib.sha256(content.encode()).hexdigest()[:16]
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
class CheckpointManager:
|
|
41
|
+
"""
|
|
42
|
+
Checkpoint stub (Public Preview: saves checkpoints but no replay logic).
|
|
43
|
+
"""
|
|
44
|
+
|
|
45
|
+
def __init__(self) -> None:
|
|
46
|
+
self._checkpoints: dict[str, list[SemanticCheckpoint]] = {}
|
|
47
|
+
self._by_goal_hash: dict[str, SemanticCheckpoint] = {}
|
|
48
|
+
|
|
49
|
+
def save(
|
|
50
|
+
self,
|
|
51
|
+
saga_id: str,
|
|
52
|
+
step_id: str,
|
|
53
|
+
goal_description: str,
|
|
54
|
+
state_snapshot: dict | None = None,
|
|
55
|
+
) -> SemanticCheckpoint:
|
|
56
|
+
"""Save a checkpoint record."""
|
|
57
|
+
goal_hash = SemanticCheckpoint.compute_goal_hash(goal_description, step_id)
|
|
58
|
+
checkpoint = SemanticCheckpoint(
|
|
59
|
+
saga_id=saga_id,
|
|
60
|
+
step_id=step_id,
|
|
61
|
+
goal_description=goal_description,
|
|
62
|
+
goal_hash=goal_hash,
|
|
63
|
+
state_snapshot=state_snapshot or {},
|
|
64
|
+
)
|
|
65
|
+
self._checkpoints.setdefault(saga_id, []).append(checkpoint)
|
|
66
|
+
self._by_goal_hash[goal_hash] = checkpoint
|
|
67
|
+
return checkpoint
|
|
68
|
+
|
|
69
|
+
def is_achieved(
|
|
70
|
+
self,
|
|
71
|
+
saga_id: str,
|
|
72
|
+
goal_description: str,
|
|
73
|
+
step_id: str,
|
|
74
|
+
) -> bool:
|
|
75
|
+
"""Always returns False (Public Preview: no skip-on-replay)."""
|
|
76
|
+
return False
|
|
77
|
+
|
|
78
|
+
def get_checkpoint(
|
|
79
|
+
self,
|
|
80
|
+
saga_id: str,
|
|
81
|
+
goal_description: str,
|
|
82
|
+
step_id: str,
|
|
83
|
+
) -> SemanticCheckpoint | None:
|
|
84
|
+
"""Returns None (Public Preview: no replay support)."""
|
|
85
|
+
return None
|
|
86
|
+
|
|
87
|
+
def invalidate(
|
|
88
|
+
self,
|
|
89
|
+
saga_id: str,
|
|
90
|
+
step_id: str,
|
|
91
|
+
reason: str = "",
|
|
92
|
+
) -> int:
|
|
93
|
+
"""No-op in Public Preview."""
|
|
94
|
+
return 0
|
|
95
|
+
|
|
96
|
+
def get_saga_checkpoints(self, saga_id: str) -> list[SemanticCheckpoint]:
|
|
97
|
+
"""Get all checkpoints for a saga."""
|
|
98
|
+
return list(self._checkpoints.get(saga_id, []))
|
|
99
|
+
|
|
100
|
+
def get_replay_plan(self, saga_id: str, steps: list[str]) -> list[str]:
|
|
101
|
+
"""All steps need execution (Public Preview: no skip logic)."""
|
|
102
|
+
return list(steps)
|
|
103
|
+
|
|
104
|
+
@property
|
|
105
|
+
def total_checkpoints(self) -> int:
|
|
106
|
+
return sum(len(v) for v in self._checkpoints.values())
|
|
107
|
+
|
|
108
|
+
@property
|
|
109
|
+
def valid_checkpoints(self) -> int:
|
|
110
|
+
return self.total_checkpoints
|
hypervisor/saga/dsl.py
ADDED
|
@@ -0,0 +1,190 @@
|
|
|
1
|
+
# Copyright (c) Microsoft Corporation.
|
|
2
|
+
# Licensed under the MIT License.
|
|
3
|
+
# Public Preview — basic implementation
|
|
4
|
+
"""
|
|
5
|
+
Declarative Saga DSL — stub implementation.
|
|
6
|
+
|
|
7
|
+
Public Preview: DSL parsing is retained for basic step definitions only.
|
|
8
|
+
Fan-out groups in DSL are ignored (sequential execution only).
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
from __future__ import annotations
|
|
12
|
+
|
|
13
|
+
import uuid
|
|
14
|
+
from dataclasses import dataclass, field
|
|
15
|
+
from typing import Any
|
|
16
|
+
|
|
17
|
+
from hypervisor.saga.fan_out import FanOutPolicy
|
|
18
|
+
from hypervisor.saga.schema import SagaSchemaValidator
|
|
19
|
+
from hypervisor.saga.state_machine import SagaStep
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
@dataclass
|
|
23
|
+
class SagaDSLStep:
|
|
24
|
+
"""A step parsed from the DSL definition."""
|
|
25
|
+
|
|
26
|
+
id: str = ""
|
|
27
|
+
action_id: str = ""
|
|
28
|
+
agent: str = ""
|
|
29
|
+
execute_api: str = ""
|
|
30
|
+
undo_api: str | None = None
|
|
31
|
+
timeout: int = 300
|
|
32
|
+
retries: int = 0
|
|
33
|
+
checkpoint_goal: str | None = None
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
@dataclass
|
|
37
|
+
class SagaDSLFanOut:
|
|
38
|
+
"""A fan-out group (Public Preview: ignored during execution)."""
|
|
39
|
+
|
|
40
|
+
policy: FanOutPolicy = FanOutPolicy.ALL_MUST_SUCCEED
|
|
41
|
+
branch_step_ids: list[str] = field(default_factory=list)
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
@dataclass
|
|
45
|
+
class SagaDefinition:
|
|
46
|
+
"""A complete saga definition parsed from DSL."""
|
|
47
|
+
|
|
48
|
+
name: str = ""
|
|
49
|
+
session_id: str = ""
|
|
50
|
+
saga_id: str = field(default_factory=lambda: f"saga:{uuid.uuid4().hex[:8]}")
|
|
51
|
+
steps: list[SagaDSLStep] = field(default_factory=list)
|
|
52
|
+
fan_outs: list[SagaDSLFanOut] = field(default_factory=list)
|
|
53
|
+
metadata: dict[str, Any] = field(default_factory=dict)
|
|
54
|
+
|
|
55
|
+
@property
|
|
56
|
+
def step_ids(self) -> list[str]:
|
|
57
|
+
return [s.id for s in self.steps]
|
|
58
|
+
|
|
59
|
+
@property
|
|
60
|
+
def fan_out_step_ids(self) -> set[str]:
|
|
61
|
+
return set()
|
|
62
|
+
|
|
63
|
+
@property
|
|
64
|
+
def sequential_steps(self) -> list[SagaDSLStep]:
|
|
65
|
+
"""All steps are sequential in Public Preview."""
|
|
66
|
+
return list(self.steps)
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
class SagaDSLParser:
|
|
70
|
+
"""
|
|
71
|
+
Parses saga definitions from dict.
|
|
72
|
+
|
|
73
|
+
Public Preview: fan-out groups are parsed but ignored during execution.
|
|
74
|
+
"""
|
|
75
|
+
|
|
76
|
+
def __init__(self, *, schema_validation: bool = False) -> None:
|
|
77
|
+
self._schema_validator = SagaSchemaValidator() if schema_validation else None
|
|
78
|
+
|
|
79
|
+
def parse(self, definition: dict[str, Any]) -> SagaDefinition:
|
|
80
|
+
"""Parse a saga definition dict into a SagaDefinition.
|
|
81
|
+
|
|
82
|
+
If schema_validation was enabled at construction, validates against
|
|
83
|
+
the JSON schema before parsing.
|
|
84
|
+
"""
|
|
85
|
+
if self._schema_validator is not None:
|
|
86
|
+
self._schema_validator.validate_or_raise(definition)
|
|
87
|
+
|
|
88
|
+
name = definition.get("name", "")
|
|
89
|
+
if not name:
|
|
90
|
+
raise SagaDSLError("Saga definition must have a 'name'")
|
|
91
|
+
|
|
92
|
+
session_id = definition.get("session_id", "")
|
|
93
|
+
if not session_id:
|
|
94
|
+
raise SagaDSLError("Saga definition must have a 'session_id'")
|
|
95
|
+
|
|
96
|
+
raw_steps = definition.get("steps", [])
|
|
97
|
+
if not raw_steps:
|
|
98
|
+
raise SagaDSLError("Saga must have at least one step")
|
|
99
|
+
|
|
100
|
+
steps = []
|
|
101
|
+
step_ids = set()
|
|
102
|
+
for raw in raw_steps:
|
|
103
|
+
step = self._parse_step(raw)
|
|
104
|
+
if step.id in step_ids:
|
|
105
|
+
raise SagaDSLError(f"Duplicate step ID: {step.id}")
|
|
106
|
+
step_ids.add(step.id)
|
|
107
|
+
steps.append(step)
|
|
108
|
+
|
|
109
|
+
return SagaDefinition(
|
|
110
|
+
name=name,
|
|
111
|
+
session_id=session_id,
|
|
112
|
+
saga_id=definition.get("saga_id", f"saga:{uuid.uuid4().hex[:8]}"),
|
|
113
|
+
steps=steps,
|
|
114
|
+
fan_outs=[],
|
|
115
|
+
metadata=definition.get("metadata", {}),
|
|
116
|
+
)
|
|
117
|
+
|
|
118
|
+
def _parse_step(self, raw: dict) -> SagaDSLStep:
|
|
119
|
+
step_id = raw.get("id", "")
|
|
120
|
+
if not step_id:
|
|
121
|
+
raise SagaDSLError("Each step must have an 'id'")
|
|
122
|
+
|
|
123
|
+
action_id = raw.get("action_id", "")
|
|
124
|
+
if not action_id:
|
|
125
|
+
raise SagaDSLError(f"Step {step_id} must have an 'action_id'")
|
|
126
|
+
|
|
127
|
+
agent = raw.get("agent", "")
|
|
128
|
+
if not agent:
|
|
129
|
+
raise SagaDSLError(f"Step {step_id} must have an 'agent'")
|
|
130
|
+
|
|
131
|
+
return SagaDSLStep(
|
|
132
|
+
id=step_id,
|
|
133
|
+
action_id=action_id,
|
|
134
|
+
agent=agent,
|
|
135
|
+
execute_api=raw.get("execute_api", ""),
|
|
136
|
+
undo_api=raw.get("undo_api"),
|
|
137
|
+
timeout=raw.get("timeout", 300),
|
|
138
|
+
retries=raw.get("retries", 0),
|
|
139
|
+
checkpoint_goal=raw.get("checkpoint_goal"),
|
|
140
|
+
)
|
|
141
|
+
|
|
142
|
+
def _parse_fan_out(self, raw: dict, valid_step_ids: set[str]) -> SagaDSLFanOut:
|
|
143
|
+
"""Parse fan-out definition (Public Preview: retained for API compat)."""
|
|
144
|
+
return SagaDSLFanOut(
|
|
145
|
+
policy=FanOutPolicy.ALL_MUST_SUCCEED,
|
|
146
|
+
branch_step_ids=raw.get("branches", []),
|
|
147
|
+
)
|
|
148
|
+
|
|
149
|
+
def to_saga_steps(self, definition: SagaDefinition) -> list[SagaStep]:
|
|
150
|
+
"""Convert a SagaDefinition into SagaStep objects."""
|
|
151
|
+
return [
|
|
152
|
+
SagaStep(
|
|
153
|
+
step_id=s.id,
|
|
154
|
+
action_id=s.action_id,
|
|
155
|
+
agent_did=s.agent,
|
|
156
|
+
execute_api=s.execute_api,
|
|
157
|
+
undo_api=s.undo_api,
|
|
158
|
+
timeout_seconds=s.timeout,
|
|
159
|
+
max_retries=s.retries,
|
|
160
|
+
)
|
|
161
|
+
for s in definition.steps
|
|
162
|
+
]
|
|
163
|
+
|
|
164
|
+
def validate(self, definition: dict[str, Any]) -> list[str]:
|
|
165
|
+
"""Validate a definition and return list of errors (empty = valid)."""
|
|
166
|
+
errors = []
|
|
167
|
+
if not definition.get("name"):
|
|
168
|
+
errors.append("Missing 'name'")
|
|
169
|
+
if not definition.get("session_id"):
|
|
170
|
+
errors.append("Missing 'session_id'")
|
|
171
|
+
if not definition.get("steps"):
|
|
172
|
+
errors.append("Missing 'steps'")
|
|
173
|
+
else:
|
|
174
|
+
step_ids = set()
|
|
175
|
+
for i, step in enumerate(definition["steps"]):
|
|
176
|
+
if not step.get("id"):
|
|
177
|
+
errors.append(f"Step {i} missing 'id'")
|
|
178
|
+
elif step["id"] in step_ids:
|
|
179
|
+
errors.append(f"Duplicate step ID: {step['id']}")
|
|
180
|
+
else:
|
|
181
|
+
step_ids.add(step["id"])
|
|
182
|
+
if not step.get("action_id"):
|
|
183
|
+
errors.append(f"Step {step.get('id', i)} missing 'action_id'")
|
|
184
|
+
if not step.get("agent"):
|
|
185
|
+
errors.append(f"Step {step.get('id', i)} missing 'agent'")
|
|
186
|
+
return errors
|
|
187
|
+
|
|
188
|
+
|
|
189
|
+
class SagaDSLError(Exception):
|
|
190
|
+
"""Raised for invalid saga DSL definitions."""
|
|
@@ -0,0 +1,126 @@
|
|
|
1
|
+
# Copyright (c) Microsoft Corporation.
|
|
2
|
+
# Licensed under the MIT License.
|
|
3
|
+
# Public Preview — basic implementation
|
|
4
|
+
"""
|
|
5
|
+
Parallel Saga Fan-Out — stub implementation.
|
|
6
|
+
|
|
7
|
+
Public Preview: only sequential ALL_MUST_SUCCEED execution.
|
|
8
|
+
Fan-out groups execute branches one at a time.
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
from __future__ import annotations
|
|
12
|
+
|
|
13
|
+
import asyncio
|
|
14
|
+
import uuid
|
|
15
|
+
from collections.abc import Callable
|
|
16
|
+
from dataclasses import dataclass, field
|
|
17
|
+
from enum import Enum
|
|
18
|
+
from typing import Any
|
|
19
|
+
|
|
20
|
+
from hypervisor.saga.state_machine import SagaStep, StepState
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class FanOutPolicy(str, Enum):
|
|
24
|
+
ALL_MUST_SUCCEED = "all_must_succeed"
|
|
25
|
+
MAJORITY_MUST_SUCCEED = "majority_must_succeed"
|
|
26
|
+
ANY_MUST_SUCCEED = "any_must_succeed"
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
@dataclass
|
|
30
|
+
class FanOutBranch:
|
|
31
|
+
branch_id: str = field(default_factory=lambda: f"branch:{uuid.uuid4().hex[:8]}")
|
|
32
|
+
step: SagaStep | None = None
|
|
33
|
+
result: Any = None
|
|
34
|
+
error: str | None = None
|
|
35
|
+
succeeded: bool = False
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
@dataclass
|
|
39
|
+
class FanOutGroup:
|
|
40
|
+
group_id: str = field(default_factory=lambda: f"fanout:{uuid.uuid4().hex[:8]}")
|
|
41
|
+
saga_id: str = ""
|
|
42
|
+
policy: FanOutPolicy = FanOutPolicy.ALL_MUST_SUCCEED
|
|
43
|
+
branches: list[FanOutBranch] = field(default_factory=list)
|
|
44
|
+
resolved: bool = False
|
|
45
|
+
policy_satisfied: bool = False
|
|
46
|
+
compensation_needed: list[str] = field(default_factory=list)
|
|
47
|
+
|
|
48
|
+
@property
|
|
49
|
+
def success_count(self) -> int:
|
|
50
|
+
return sum(1 for b in self.branches if b.succeeded)
|
|
51
|
+
|
|
52
|
+
@property
|
|
53
|
+
def failure_count(self) -> int:
|
|
54
|
+
return sum(1 for b in self.branches if not b.succeeded and b.error)
|
|
55
|
+
|
|
56
|
+
@property
|
|
57
|
+
def total_branches(self) -> int:
|
|
58
|
+
return len(self.branches)
|
|
59
|
+
|
|
60
|
+
def check_policy(self) -> bool:
|
|
61
|
+
"""Public Preview: only ALL_MUST_SUCCEED is enforced."""
|
|
62
|
+
return self.success_count == self.total_branches
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
class FanOutOrchestrator:
|
|
66
|
+
"""Fan-out stub (Public Preview: sequential execution, ALL_MUST_SUCCEED only)."""
|
|
67
|
+
|
|
68
|
+
def __init__(self) -> None:
|
|
69
|
+
self._groups: dict[str, FanOutGroup] = {}
|
|
70
|
+
|
|
71
|
+
def create_group(self, saga_id: str, policy: FanOutPolicy = FanOutPolicy.ALL_MUST_SUCCEED) -> FanOutGroup:
|
|
72
|
+
group = FanOutGroup(saga_id=saga_id, policy=FanOutPolicy.ALL_MUST_SUCCEED)
|
|
73
|
+
self._groups[group.group_id] = group
|
|
74
|
+
return group
|
|
75
|
+
|
|
76
|
+
def add_branch(self, group_id: str, step: SagaStep) -> FanOutBranch:
|
|
77
|
+
group = self._get_group(group_id)
|
|
78
|
+
branch = FanOutBranch(step=step)
|
|
79
|
+
group.branches.append(branch)
|
|
80
|
+
return branch
|
|
81
|
+
|
|
82
|
+
async def execute(
|
|
83
|
+
self, group_id: str, executors: dict[str, Callable[..., Any]], timeout_seconds: int = 300,
|
|
84
|
+
) -> FanOutGroup:
|
|
85
|
+
"""Execute branches sequentially (Public Preview)."""
|
|
86
|
+
group = self._get_group(group_id)
|
|
87
|
+
|
|
88
|
+
for branch in group.branches:
|
|
89
|
+
if not branch.step:
|
|
90
|
+
branch.error = "No step assigned"
|
|
91
|
+
continue
|
|
92
|
+
executor = executors.get(branch.step.step_id)
|
|
93
|
+
if not executor:
|
|
94
|
+
branch.error = f"No executor for step {branch.step.step_id}"
|
|
95
|
+
continue
|
|
96
|
+
try:
|
|
97
|
+
branch.step.transition(StepState.EXECUTING)
|
|
98
|
+
result = await asyncio.wait_for(executor(), timeout=branch.step.timeout_seconds)
|
|
99
|
+
branch.result = result
|
|
100
|
+
branch.succeeded = True
|
|
101
|
+
branch.step.execute_result = result
|
|
102
|
+
branch.step.transition(StepState.COMMITTED)
|
|
103
|
+
except Exception as e:
|
|
104
|
+
branch.error = str(e)
|
|
105
|
+
branch.step.error = str(e)
|
|
106
|
+
branch.step.transition(StepState.FAILED)
|
|
107
|
+
break # ALL_MUST_SUCCEED: stop on first failure
|
|
108
|
+
|
|
109
|
+
group.policy_satisfied = group.check_policy()
|
|
110
|
+
group.resolved = True
|
|
111
|
+
if not group.policy_satisfied:
|
|
112
|
+
group.compensation_needed = [b.step.step_id for b in group.branches if b.succeeded and b.step]
|
|
113
|
+
return group
|
|
114
|
+
|
|
115
|
+
def get_group(self, group_id: str) -> FanOutGroup | None:
|
|
116
|
+
return self._groups.get(group_id)
|
|
117
|
+
|
|
118
|
+
def _get_group(self, group_id: str) -> FanOutGroup:
|
|
119
|
+
group = self._groups.get(group_id)
|
|
120
|
+
if not group:
|
|
121
|
+
raise ValueError(f"Fan-out group {group_id} not found")
|
|
122
|
+
return group
|
|
123
|
+
|
|
124
|
+
@property
|
|
125
|
+
def active_groups(self) -> list[FanOutGroup]:
|
|
126
|
+
return [g for g in self._groups.values() if not g.resolved]
|
|
@@ -0,0 +1,229 @@
|
|
|
1
|
+
# Copyright (c) Microsoft Corporation.
|
|
2
|
+
# Licensed under the MIT License.
|
|
3
|
+
# Public Preview — basic implementation
|
|
4
|
+
"""
|
|
5
|
+
Semantic Saga Orchestrator
|
|
6
|
+
|
|
7
|
+
Sequential step execution with reverse-order compensation on failure.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from __future__ import annotations
|
|
11
|
+
|
|
12
|
+
import asyncio
|
|
13
|
+
import uuid
|
|
14
|
+
from collections.abc import Callable
|
|
15
|
+
from typing import Any
|
|
16
|
+
|
|
17
|
+
from hypervisor.constants import (
|
|
18
|
+
SAGA_DEFAULT_MAX_RETRIES,
|
|
19
|
+
SAGA_DEFAULT_RETRY_DELAY_SECONDS,
|
|
20
|
+
SAGA_DEFAULT_STEP_TIMEOUT_SECONDS,
|
|
21
|
+
)
|
|
22
|
+
from hypervisor.saga.state_machine import (
|
|
23
|
+
Saga,
|
|
24
|
+
SagaState,
|
|
25
|
+
SagaStateError,
|
|
26
|
+
SagaStep,
|
|
27
|
+
StepState,
|
|
28
|
+
)
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class SagaTimeoutError(Exception):
|
|
32
|
+
"""Raised when a saga step exceeds its timeout."""
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
class SagaOrchestrator:
|
|
36
|
+
"""
|
|
37
|
+
Orchestrates multi-step agent transactions with saga semantics.
|
|
38
|
+
|
|
39
|
+
Forward execution records each step. On failure, the orchestrator
|
|
40
|
+
iterates the Reversibility Registry in reverse order, calling
|
|
41
|
+
Undo_API for each committed step. If any Undo_API fails,
|
|
42
|
+
Joint Liability penalty is triggered.
|
|
43
|
+
"""
|
|
44
|
+
|
|
45
|
+
DEFAULT_MAX_RETRIES = SAGA_DEFAULT_MAX_RETRIES
|
|
46
|
+
DEFAULT_RETRY_DELAY_SECONDS = SAGA_DEFAULT_RETRY_DELAY_SECONDS
|
|
47
|
+
|
|
48
|
+
def __init__(self) -> None:
|
|
49
|
+
self._sagas: dict[str, Saga] = {}
|
|
50
|
+
|
|
51
|
+
def create_saga(self, session_id: str) -> Saga:
|
|
52
|
+
"""Create a new saga for a session."""
|
|
53
|
+
saga = Saga(
|
|
54
|
+
saga_id=f"saga:{uuid.uuid4()}",
|
|
55
|
+
session_id=session_id,
|
|
56
|
+
)
|
|
57
|
+
self._sagas[saga.saga_id] = saga
|
|
58
|
+
return saga
|
|
59
|
+
|
|
60
|
+
def add_step(
|
|
61
|
+
self,
|
|
62
|
+
saga_id: str,
|
|
63
|
+
action_id: str,
|
|
64
|
+
agent_did: str,
|
|
65
|
+
execute_api: str,
|
|
66
|
+
undo_api: str | None = None,
|
|
67
|
+
timeout_seconds: int = SAGA_DEFAULT_STEP_TIMEOUT_SECONDS,
|
|
68
|
+
max_retries: int = 0,
|
|
69
|
+
) -> SagaStep:
|
|
70
|
+
"""Add a step to a saga."""
|
|
71
|
+
saga = self._get_saga(saga_id)
|
|
72
|
+
step = SagaStep(
|
|
73
|
+
step_id=f"step:{uuid.uuid4()}",
|
|
74
|
+
action_id=action_id,
|
|
75
|
+
agent_did=agent_did,
|
|
76
|
+
execute_api=execute_api,
|
|
77
|
+
undo_api=undo_api,
|
|
78
|
+
timeout_seconds=timeout_seconds,
|
|
79
|
+
max_retries=max_retries,
|
|
80
|
+
)
|
|
81
|
+
saga.steps.append(step)
|
|
82
|
+
return step
|
|
83
|
+
|
|
84
|
+
async def execute_step(
|
|
85
|
+
self,
|
|
86
|
+
saga_id: str,
|
|
87
|
+
step_id: str,
|
|
88
|
+
executor: Callable[..., Any],
|
|
89
|
+
) -> Any:
|
|
90
|
+
"""
|
|
91
|
+
Execute a single saga step with timeout and retry support.
|
|
92
|
+
|
|
93
|
+
Args:
|
|
94
|
+
saga_id: Saga identifier
|
|
95
|
+
step_id: Step identifier
|
|
96
|
+
executor: Async callable that performs the action
|
|
97
|
+
|
|
98
|
+
Returns:
|
|
99
|
+
Result from the executor
|
|
100
|
+
|
|
101
|
+
Raises:
|
|
102
|
+
SagaStateError: If step is not in PENDING state
|
|
103
|
+
SagaTimeoutError: If step exceeds its timeout
|
|
104
|
+
"""
|
|
105
|
+
saga = self._get_saga(saga_id)
|
|
106
|
+
step = self._get_step(saga, step_id)
|
|
107
|
+
|
|
108
|
+
last_error: Exception | None = None
|
|
109
|
+
attempts = 1 + step.max_retries
|
|
110
|
+
|
|
111
|
+
for attempt in range(attempts):
|
|
112
|
+
step.retry_count = attempt
|
|
113
|
+
step.transition(StepState.EXECUTING)
|
|
114
|
+
try:
|
|
115
|
+
result = await asyncio.wait_for(
|
|
116
|
+
executor(),
|
|
117
|
+
timeout=step.timeout_seconds,
|
|
118
|
+
)
|
|
119
|
+
step.execute_result = result
|
|
120
|
+
step.transition(StepState.COMMITTED)
|
|
121
|
+
return result
|
|
122
|
+
except TimeoutError:
|
|
123
|
+
last_error = SagaTimeoutError(
|
|
124
|
+
f"Step {step_id} timed out after {step.timeout_seconds}s "
|
|
125
|
+
f"(attempt {attempt + 1}/{attempts})"
|
|
126
|
+
)
|
|
127
|
+
step.error = str(last_error)
|
|
128
|
+
step.transition(StepState.FAILED)
|
|
129
|
+
if attempt < attempts - 1:
|
|
130
|
+
# Reset to PENDING for retry
|
|
131
|
+
step.state = StepState.PENDING
|
|
132
|
+
step.error = None
|
|
133
|
+
await asyncio.sleep(
|
|
134
|
+
self.DEFAULT_RETRY_DELAY_SECONDS * (attempt + 1)
|
|
135
|
+
)
|
|
136
|
+
except Exception as e:
|
|
137
|
+
last_error = e
|
|
138
|
+
step.error = str(e)
|
|
139
|
+
step.transition(StepState.FAILED)
|
|
140
|
+
if attempt < attempts - 1:
|
|
141
|
+
step.state = StepState.PENDING
|
|
142
|
+
step.error = None
|
|
143
|
+
await asyncio.sleep(
|
|
144
|
+
self.DEFAULT_RETRY_DELAY_SECONDS * (attempt + 1)
|
|
145
|
+
)
|
|
146
|
+
|
|
147
|
+
# All retries exhausted
|
|
148
|
+
if last_error:
|
|
149
|
+
raise last_error
|
|
150
|
+
raise SagaStateError("Step execution failed with no error captured")
|
|
151
|
+
|
|
152
|
+
async def compensate(
|
|
153
|
+
self,
|
|
154
|
+
saga_id: str,
|
|
155
|
+
compensator: Callable[[SagaStep], Any],
|
|
156
|
+
) -> list[SagaStep]:
|
|
157
|
+
"""
|
|
158
|
+
Run compensation (rollback) for all committed steps in reverse order.
|
|
159
|
+
|
|
160
|
+
Args:
|
|
161
|
+
saga_id: Saga identifier
|
|
162
|
+
compensator: Async callable that takes a SagaStep and calls its Undo_API
|
|
163
|
+
|
|
164
|
+
Returns:
|
|
165
|
+
List of steps that failed compensation (empty = full success)
|
|
166
|
+
"""
|
|
167
|
+
saga = self._get_saga(saga_id)
|
|
168
|
+
saga.transition(SagaState.COMPENSATING)
|
|
169
|
+
|
|
170
|
+
failed_compensations: list[SagaStep] = []
|
|
171
|
+
|
|
172
|
+
for step in saga.committed_steps_reversed:
|
|
173
|
+
if not step.undo_api:
|
|
174
|
+
step.state = StepState.COMPENSATION_FAILED
|
|
175
|
+
step.error = "No Undo_API available"
|
|
176
|
+
failed_compensations.append(step)
|
|
177
|
+
continue
|
|
178
|
+
|
|
179
|
+
step.transition(StepState.COMPENSATING)
|
|
180
|
+
try:
|
|
181
|
+
result = await asyncio.wait_for(
|
|
182
|
+
compensator(step),
|
|
183
|
+
timeout=step.timeout_seconds,
|
|
184
|
+
)
|
|
185
|
+
step.compensation_result = result
|
|
186
|
+
step.transition(StepState.COMPENSATED)
|
|
187
|
+
except TimeoutError:
|
|
188
|
+
step.error = f"Compensation timed out after {step.timeout_seconds}s"
|
|
189
|
+
step.transition(StepState.COMPENSATION_FAILED)
|
|
190
|
+
failed_compensations.append(step)
|
|
191
|
+
except Exception as e:
|
|
192
|
+
step.error = f"Compensation failed: {e}"
|
|
193
|
+
step.transition(StepState.COMPENSATION_FAILED)
|
|
194
|
+
failed_compensations.append(step)
|
|
195
|
+
|
|
196
|
+
if failed_compensations:
|
|
197
|
+
saga.transition(SagaState.ESCALATED)
|
|
198
|
+
saga.error = (
|
|
199
|
+
f"{len(failed_compensations)} step(s) failed compensation — "
|
|
200
|
+
"Joint Liability penalty triggered"
|
|
201
|
+
)
|
|
202
|
+
else:
|
|
203
|
+
saga.transition(SagaState.COMPLETED)
|
|
204
|
+
|
|
205
|
+
return failed_compensations
|
|
206
|
+
|
|
207
|
+
def get_saga(self, saga_id: str) -> Saga | None:
|
|
208
|
+
"""Get a saga by ID."""
|
|
209
|
+
return self._sagas.get(saga_id)
|
|
210
|
+
|
|
211
|
+
@property
|
|
212
|
+
def active_sagas(self) -> list[Saga]:
|
|
213
|
+
"""Get all non-terminal sagas."""
|
|
214
|
+
return [
|
|
215
|
+
s for s in self._sagas.values()
|
|
216
|
+
if s.state in (SagaState.RUNNING, SagaState.COMPENSATING)
|
|
217
|
+
]
|
|
218
|
+
|
|
219
|
+
def _get_saga(self, saga_id: str) -> Saga:
|
|
220
|
+
saga = self._sagas.get(saga_id)
|
|
221
|
+
if not saga:
|
|
222
|
+
raise SagaStateError(f"Saga {saga_id} not found")
|
|
223
|
+
return saga
|
|
224
|
+
|
|
225
|
+
def _get_step(self, saga: Saga, step_id: str) -> SagaStep:
|
|
226
|
+
for step in saga.steps:
|
|
227
|
+
if step.step_id == step_id:
|
|
228
|
+
return step
|
|
229
|
+
raise SagaStateError(f"Step {step_id} not found in saga {saga.saga_id}")
|