agent_hypervisor 3.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agent_hypervisor-3.1.0.dist-info/METADATA +824 -0
- agent_hypervisor-3.1.0.dist-info/RECORD +60 -0
- agent_hypervisor-3.1.0.dist-info/WHEEL +4 -0
- agent_hypervisor-3.1.0.dist-info/entry_points.txt +2 -0
- agent_hypervisor-3.1.0.dist-info/licenses/LICENSE +21 -0
- hypervisor/__init__.py +160 -0
- hypervisor/api/__init__.py +7 -0
- hypervisor/api/models.py +285 -0
- hypervisor/api/server.py +742 -0
- hypervisor/audit/__init__.py +4 -0
- hypervisor/audit/commitment.py +76 -0
- hypervisor/audit/delta.py +135 -0
- hypervisor/audit/gc.py +99 -0
- hypervisor/cli/__init__.py +3 -0
- hypervisor/cli/formatters.py +99 -0
- hypervisor/cli/session_commands.py +200 -0
- hypervisor/constants.py +106 -0
- hypervisor/core.py +352 -0
- hypervisor/integrations/__init__.py +10 -0
- hypervisor/integrations/iatp_adapter.py +142 -0
- hypervisor/integrations/nexus_adapter.py +108 -0
- hypervisor/integrations/verification_adapter.py +122 -0
- hypervisor/liability/__init__.py +142 -0
- hypervisor/liability/attribution.py +86 -0
- hypervisor/liability/ledger.py +121 -0
- hypervisor/liability/quarantine.py +119 -0
- hypervisor/liability/slashing.py +80 -0
- hypervisor/liability/vouching.py +134 -0
- hypervisor/models.py +277 -0
- hypervisor/observability/__init__.py +27 -0
- hypervisor/observability/causal_trace.py +70 -0
- hypervisor/observability/event_bus.py +222 -0
- hypervisor/observability/prometheus_collector.py +248 -0
- hypervisor/observability/saga_span_exporter.py +341 -0
- hypervisor/providers.py +121 -0
- hypervisor/py.typed +0 -0
- hypervisor/reversibility/__init__.py +3 -0
- hypervisor/reversibility/registry.py +108 -0
- hypervisor/rings/__init__.py +21 -0
- hypervisor/rings/breach_detector.py +200 -0
- hypervisor/rings/classifier.py +78 -0
- hypervisor/rings/elevation.py +219 -0
- hypervisor/rings/enforcer.py +97 -0
- hypervisor/saga/__init__.py +22 -0
- hypervisor/saga/checkpoint.py +110 -0
- hypervisor/saga/dsl.py +190 -0
- hypervisor/saga/fan_out.py +126 -0
- hypervisor/saga/orchestrator.py +229 -0
- hypervisor/saga/schema.py +244 -0
- hypervisor/saga/state_machine.py +157 -0
- hypervisor/security/__init__.py +13 -0
- hypervisor/security/kill_switch.py +200 -0
- hypervisor/security/rate_limiter.py +190 -0
- hypervisor/session/__init__.py +194 -0
- hypervisor/session/intent_locks.py +118 -0
- hypervisor/session/isolation.py +37 -0
- hypervisor/session/sso.py +169 -0
- hypervisor/session/vector_clock.py +118 -0
- hypervisor/verification/__init__.py +3 -0
- hypervisor/verification/history.py +173 -0
|
@@ -0,0 +1,222 @@
|
|
|
1
|
+
# Copyright (c) Microsoft Corporation.
|
|
2
|
+
# Licensed under the MIT License.
|
|
3
|
+
"""
|
|
4
|
+
Structured event bus for the Agent Hypervisor.
|
|
5
|
+
|
|
6
|
+
Every ring transition, liability event, saga step, session write, and
|
|
7
|
+
security action emits a typed event to an append-only store. Enables
|
|
8
|
+
full replay debugging, post-mortem analysis, and real-time monitoring.
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
from __future__ import annotations
|
|
12
|
+
|
|
13
|
+
import uuid
|
|
14
|
+
from collections.abc import Callable
|
|
15
|
+
from dataclasses import dataclass, field
|
|
16
|
+
from datetime import UTC, datetime
|
|
17
|
+
from enum import Enum
|
|
18
|
+
from typing import Any
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class EventType(str, Enum):
|
|
22
|
+
"""Categorised hypervisor event types."""
|
|
23
|
+
|
|
24
|
+
# Session lifecycle
|
|
25
|
+
SESSION_CREATED = "session.created"
|
|
26
|
+
SESSION_JOINED = "session.joined"
|
|
27
|
+
SESSION_ACTIVATED = "session.activated"
|
|
28
|
+
SESSION_TERMINATED = "session.terminated"
|
|
29
|
+
SESSION_ARCHIVED = "session.archived"
|
|
30
|
+
|
|
31
|
+
# Ring transitions
|
|
32
|
+
RING_ASSIGNED = "ring.assigned"
|
|
33
|
+
RING_ELEVATED = "ring.elevated"
|
|
34
|
+
RING_DEMOTED = "ring.demoted"
|
|
35
|
+
RING_ELEVATION_EXPIRED = "ring.elevation_expired"
|
|
36
|
+
RING_BREACH_DETECTED = "ring.breach_detected"
|
|
37
|
+
|
|
38
|
+
# Liability
|
|
39
|
+
VOUCH_CREATED = "liability.vouch_created"
|
|
40
|
+
VOUCH_RELEASED = "liability.vouch_released"
|
|
41
|
+
SLASH_EXECUTED = "liability.slash_executed"
|
|
42
|
+
FAULT_ATTRIBUTED = "liability.fault_attributed"
|
|
43
|
+
QUARANTINE_ENTERED = "liability.quarantine_entered"
|
|
44
|
+
QUARANTINE_RELEASED = "liability.quarantine_released"
|
|
45
|
+
|
|
46
|
+
# Saga
|
|
47
|
+
SAGA_CREATED = "saga.created"
|
|
48
|
+
SAGA_STEP_STARTED = "saga.step_started"
|
|
49
|
+
SAGA_STEP_COMMITTED = "saga.step_committed"
|
|
50
|
+
SAGA_STEP_FAILED = "saga.step_failed"
|
|
51
|
+
SAGA_COMPENSATING = "saga.compensating"
|
|
52
|
+
SAGA_COMPLETED = "saga.completed"
|
|
53
|
+
SAGA_ESCALATED = "saga.escalated"
|
|
54
|
+
SAGA_FANOUT_STARTED = "saga.fanout_started"
|
|
55
|
+
SAGA_FANOUT_RESOLVED = "saga.fanout_resolved"
|
|
56
|
+
SAGA_CHECKPOINT_SAVED = "saga.checkpoint_saved"
|
|
57
|
+
|
|
58
|
+
# VFS / Session writes
|
|
59
|
+
VFS_WRITE = "vfs.write"
|
|
60
|
+
VFS_DELETE = "vfs.delete"
|
|
61
|
+
VFS_SNAPSHOT = "vfs.snapshot"
|
|
62
|
+
VFS_RESTORE = "vfs.restore"
|
|
63
|
+
VFS_CONFLICT = "vfs.conflict"
|
|
64
|
+
|
|
65
|
+
# Security
|
|
66
|
+
RATE_LIMITED = "security.rate_limited"
|
|
67
|
+
AGENT_KILLED = "security.agent_killed"
|
|
68
|
+
SAGA_HANDOFF = "security.saga_handoff"
|
|
69
|
+
IDENTITY_VERIFIED = "security.identity_verified"
|
|
70
|
+
|
|
71
|
+
# Audit
|
|
72
|
+
AUDIT_DELTA_CAPTURED = "audit.delta_captured"
|
|
73
|
+
AUDIT_COMMITTED = "audit.committed"
|
|
74
|
+
AUDIT_GC_COLLECTED = "audit.gc_collected"
|
|
75
|
+
|
|
76
|
+
# Verification
|
|
77
|
+
BEHAVIOR_DRIFT = "verification.behavior_drift"
|
|
78
|
+
HISTORY_VERIFIED = "verification.history_verified"
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
@dataclass(frozen=True)
|
|
82
|
+
class HypervisorEvent:
|
|
83
|
+
"""An immutable, structured event emitted by the hypervisor."""
|
|
84
|
+
|
|
85
|
+
event_id: str = field(default_factory=lambda: uuid.uuid4().hex[:16])
|
|
86
|
+
event_type: EventType = EventType.SESSION_CREATED
|
|
87
|
+
timestamp: datetime = field(default_factory=lambda: datetime.now(UTC))
|
|
88
|
+
session_id: str | None = None
|
|
89
|
+
agent_did: str | None = None
|
|
90
|
+
causal_trace_id: str | None = None
|
|
91
|
+
parent_event_id: str | None = None
|
|
92
|
+
payload: dict[str, Any] = field(default_factory=dict)
|
|
93
|
+
|
|
94
|
+
def to_dict(self) -> dict[str, Any]:
|
|
95
|
+
return {
|
|
96
|
+
"event_id": self.event_id,
|
|
97
|
+
"event_type": self.event_type.value,
|
|
98
|
+
"timestamp": self.timestamp.isoformat(),
|
|
99
|
+
"session_id": self.session_id,
|
|
100
|
+
"agent_did": self.agent_did,
|
|
101
|
+
"causal_trace_id": self.causal_trace_id,
|
|
102
|
+
"parent_event_id": self.parent_event_id,
|
|
103
|
+
"payload": self.payload,
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
# Type alias for event subscribers
|
|
108
|
+
EventHandler = Callable[[HypervisorEvent], None]
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
class HypervisorEventBus:
|
|
112
|
+
"""
|
|
113
|
+
Append-only structured event store with pub/sub.
|
|
114
|
+
|
|
115
|
+
All hypervisor components emit events here. Supports:
|
|
116
|
+
- Append-only storage (immutable event log)
|
|
117
|
+
- Query by type, agent, session, time range
|
|
118
|
+
- Subscribe to specific event types
|
|
119
|
+
- Event count and statistics
|
|
120
|
+
"""
|
|
121
|
+
|
|
122
|
+
def __init__(self) -> None:
|
|
123
|
+
self._events: list[HypervisorEvent] = []
|
|
124
|
+
self._subscribers: dict[EventType | None, list[EventHandler]] = {}
|
|
125
|
+
self._by_type: dict[EventType, list[HypervisorEvent]] = {}
|
|
126
|
+
self._by_session: dict[str, list[HypervisorEvent]] = {}
|
|
127
|
+
self._by_agent: dict[str, list[HypervisorEvent]] = {}
|
|
128
|
+
|
|
129
|
+
def emit(self, event: HypervisorEvent) -> None:
|
|
130
|
+
"""Append an event and notify subscribers."""
|
|
131
|
+
self._events.append(event)
|
|
132
|
+
|
|
133
|
+
# Index by type
|
|
134
|
+
self._by_type.setdefault(event.event_type, []).append(event)
|
|
135
|
+
|
|
136
|
+
# Index by session
|
|
137
|
+
if event.session_id:
|
|
138
|
+
self._by_session.setdefault(event.session_id, []).append(event)
|
|
139
|
+
|
|
140
|
+
# Index by agent
|
|
141
|
+
if event.agent_did:
|
|
142
|
+
self._by_agent.setdefault(event.agent_did, []).append(event)
|
|
143
|
+
|
|
144
|
+
# Notify type-specific subscribers
|
|
145
|
+
for handler in self._subscribers.get(event.event_type, []):
|
|
146
|
+
handler(event)
|
|
147
|
+
|
|
148
|
+
# Notify wildcard subscribers
|
|
149
|
+
for handler in self._subscribers.get(None, []):
|
|
150
|
+
handler(event)
|
|
151
|
+
|
|
152
|
+
def subscribe(
|
|
153
|
+
self,
|
|
154
|
+
event_type: EventType | None = None,
|
|
155
|
+
handler: EventHandler | None = None,
|
|
156
|
+
) -> None:
|
|
157
|
+
"""Subscribe to events. Use event_type=None for all events."""
|
|
158
|
+
if handler:
|
|
159
|
+
self._subscribers.setdefault(event_type, []).append(handler)
|
|
160
|
+
|
|
161
|
+
def query_by_type(self, event_type: EventType) -> list[HypervisorEvent]:
|
|
162
|
+
"""Get all events of a specific type."""
|
|
163
|
+
return list(self._by_type.get(event_type, []))
|
|
164
|
+
|
|
165
|
+
def query_by_session(self, session_id: str) -> list[HypervisorEvent]:
|
|
166
|
+
"""Get all events for a specific session."""
|
|
167
|
+
return list(self._by_session.get(session_id, []))
|
|
168
|
+
|
|
169
|
+
def query_by_agent(self, agent_did: str) -> list[HypervisorEvent]:
|
|
170
|
+
"""Get all events involving a specific agent."""
|
|
171
|
+
return list(self._by_agent.get(agent_did, []))
|
|
172
|
+
|
|
173
|
+
def query_by_time_range(
|
|
174
|
+
self,
|
|
175
|
+
start: datetime,
|
|
176
|
+
end: datetime | None = None,
|
|
177
|
+
) -> list[HypervisorEvent]:
|
|
178
|
+
"""Get events within a time range."""
|
|
179
|
+
if end is None:
|
|
180
|
+
end = datetime.now(UTC)
|
|
181
|
+
return [e for e in self._events if start <= e.timestamp <= end]
|
|
182
|
+
|
|
183
|
+
def query(
|
|
184
|
+
self,
|
|
185
|
+
event_type: EventType | None = None,
|
|
186
|
+
session_id: str | None = None,
|
|
187
|
+
agent_did: str | None = None,
|
|
188
|
+
limit: int | None = None,
|
|
189
|
+
) -> list[HypervisorEvent]:
|
|
190
|
+
"""Flexible query with multiple filters."""
|
|
191
|
+
results = self._events
|
|
192
|
+
|
|
193
|
+
if event_type is not None:
|
|
194
|
+
results = [e for e in results if e.event_type == event_type]
|
|
195
|
+
if session_id is not None:
|
|
196
|
+
results = [e for e in results if e.session_id == session_id]
|
|
197
|
+
if agent_did is not None:
|
|
198
|
+
results = [e for e in results if e.agent_did == agent_did]
|
|
199
|
+
|
|
200
|
+
if limit is not None:
|
|
201
|
+
results = results[-limit:]
|
|
202
|
+
|
|
203
|
+
return results
|
|
204
|
+
|
|
205
|
+
@property
|
|
206
|
+
def event_count(self) -> int:
|
|
207
|
+
return len(self._events)
|
|
208
|
+
|
|
209
|
+
@property
|
|
210
|
+
def all_events(self) -> list[HypervisorEvent]:
|
|
211
|
+
return list(self._events)
|
|
212
|
+
|
|
213
|
+
def type_counts(self) -> dict[str, int]:
|
|
214
|
+
"""Return count of events per type."""
|
|
215
|
+
return {t.value: len(evts) for t, evts in self._by_type.items()}
|
|
216
|
+
|
|
217
|
+
def clear(self) -> None:
|
|
218
|
+
"""Clear all events (for testing)."""
|
|
219
|
+
self._events.clear()
|
|
220
|
+
self._by_type.clear()
|
|
221
|
+
self._by_session.clear()
|
|
222
|
+
self._by_agent.clear()
|
|
@@ -0,0 +1,248 @@
|
|
|
1
|
+
# Copyright (c) Microsoft Corporation.
|
|
2
|
+
# Licensed under the MIT License.
|
|
3
|
+
"""
|
|
4
|
+
Prometheus metrics collector for Hypervisor ring transitions.
|
|
5
|
+
|
|
6
|
+
Subscribes to the HypervisorEventBus and maintains in-memory counters
|
|
7
|
+
for ring-related events (transitions, breaches, elevations). Metrics
|
|
8
|
+
follow the ``agent_hypervisor_ring_*`` prefix convention.
|
|
9
|
+
|
|
10
|
+
No external dependencies — works standalone or exports to the
|
|
11
|
+
``agent-sre`` PrometheusExporter via dependency injection.
|
|
12
|
+
|
|
13
|
+
Usage::
|
|
14
|
+
|
|
15
|
+
from hypervisor.observability import RingMetricsCollector, HypervisorEventBus
|
|
16
|
+
|
|
17
|
+
bus = HypervisorEventBus()
|
|
18
|
+
collector = RingMetricsCollector(bus)
|
|
19
|
+
|
|
20
|
+
# ... hypervisor operates, ring events flow through the bus ...
|
|
21
|
+
|
|
22
|
+
snapshot = collector.collect()
|
|
23
|
+
# {"agent_hypervisor_ring_transitions_total": {"ring.assigned": 5, ...}, ...}
|
|
24
|
+
|
|
25
|
+
# Optional: export to agent-sre PrometheusExporter
|
|
26
|
+
from agent_sre.integrations.prometheus import PrometheusExporter
|
|
27
|
+
prom = PrometheusExporter()
|
|
28
|
+
collector.export_to_prometheus(prom)
|
|
29
|
+
"""
|
|
30
|
+
|
|
31
|
+
from __future__ import annotations
|
|
32
|
+
|
|
33
|
+
import time
|
|
34
|
+
from collections import defaultdict
|
|
35
|
+
from typing import Any, Protocol
|
|
36
|
+
|
|
37
|
+
from hypervisor.observability.event_bus import EventType, HypervisorEvent, HypervisorEventBus
|
|
38
|
+
|
|
39
|
+
# Ring event types that represent transitions
|
|
40
|
+
_RING_TRANSITION_EVENTS = frozenset({
|
|
41
|
+
EventType.RING_ASSIGNED,
|
|
42
|
+
EventType.RING_ELEVATED,
|
|
43
|
+
EventType.RING_DEMOTED,
|
|
44
|
+
EventType.RING_ELEVATION_EXPIRED,
|
|
45
|
+
})
|
|
46
|
+
|
|
47
|
+
_RING_BREACH_EVENTS = frozenset({
|
|
48
|
+
EventType.RING_BREACH_DETECTED,
|
|
49
|
+
})
|
|
50
|
+
|
|
51
|
+
_ALL_RING_EVENTS = _RING_TRANSITION_EVENTS | _RING_BREACH_EVENTS
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
class PrometheusExporterProtocol(Protocol):
|
|
55
|
+
"""Protocol for Prometheus exporters — avoids hard dep on agent-sre."""
|
|
56
|
+
|
|
57
|
+
def set_gauge(
|
|
58
|
+
self, name: str, value: float,
|
|
59
|
+
labels: dict[str, str] | None = None,
|
|
60
|
+
help_text: str = "",
|
|
61
|
+
) -> None: ...
|
|
62
|
+
|
|
63
|
+
def inc_counter(
|
|
64
|
+
self, name: str, value: float = 1.0,
|
|
65
|
+
labels: dict[str, str] | None = None,
|
|
66
|
+
help_text: str = "",
|
|
67
|
+
) -> None: ...
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
# ---------------------------------------------------------------------------
|
|
71
|
+
# Metric name constants
|
|
72
|
+
# ---------------------------------------------------------------------------
|
|
73
|
+
|
|
74
|
+
METRIC_RING_TRANSITIONS_TOTAL = "agent_hypervisor_ring_transitions_total"
|
|
75
|
+
METRIC_RING_BREACHES_TOTAL = "agent_hypervisor_ring_breaches_total"
|
|
76
|
+
METRIC_RING_CURRENT = "agent_hypervisor_ring_current"
|
|
77
|
+
METRIC_RING_ELEVATION_DURATION = "agent_hypervisor_ring_elevation_duration_seconds"
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
class RingMetricsCollector:
|
|
81
|
+
"""Collects Prometheus-compatible metrics from hypervisor ring events.
|
|
82
|
+
|
|
83
|
+
Subscribes to the ``HypervisorEventBus`` for ring transition and breach
|
|
84
|
+
events. Maintains in-memory counters and gauges that can be exported
|
|
85
|
+
to any :class:`PrometheusExporterProtocol`-compatible exporter.
|
|
86
|
+
|
|
87
|
+
Attributes:
|
|
88
|
+
_bus: The event bus this collector is subscribed to.
|
|
89
|
+
_transition_counts: Counter per ``(event_type, agent_did, session_id)``.
|
|
90
|
+
_breach_counts: Counter per ``(agent_did, session_id)``.
|
|
91
|
+
_current_rings: Current ring gauge per ``agent_did``.
|
|
92
|
+
_elevation_start: Timestamp when an agent entered elevated state.
|
|
93
|
+
_elevation_durations: Last known elevation duration per ``agent_did``.
|
|
94
|
+
"""
|
|
95
|
+
|
|
96
|
+
def __init__(self, bus: HypervisorEventBus) -> None:
|
|
97
|
+
self._bus = bus
|
|
98
|
+
|
|
99
|
+
# Counters: (event_type_value, agent_did, session_id) -> count
|
|
100
|
+
self._transition_counts: dict[tuple[str, str, str], int] = defaultdict(int)
|
|
101
|
+
|
|
102
|
+
# Breach counter: (agent_did, session_id) -> count
|
|
103
|
+
self._breach_counts: dict[tuple[str, str], int] = defaultdict(int)
|
|
104
|
+
|
|
105
|
+
# Current ring per agent: agent_did -> ring_value (int)
|
|
106
|
+
self._current_rings: dict[str, int] = {}
|
|
107
|
+
|
|
108
|
+
# Elevation tracking: agent_did -> start timestamp
|
|
109
|
+
self._elevation_start: dict[str, float] = {}
|
|
110
|
+
|
|
111
|
+
# Last elevation duration: agent_did -> seconds (float)
|
|
112
|
+
self._elevation_durations: dict[str, float] = {}
|
|
113
|
+
|
|
114
|
+
# Total events processed
|
|
115
|
+
self._events_processed: int = 0
|
|
116
|
+
|
|
117
|
+
# Subscribe to all ring events
|
|
118
|
+
for event_type in _ALL_RING_EVENTS:
|
|
119
|
+
bus.subscribe(event_type=event_type, handler=self._handle_event)
|
|
120
|
+
|
|
121
|
+
def _handle_event(self, event: HypervisorEvent) -> None:
|
|
122
|
+
"""Process a ring-related event from the bus."""
|
|
123
|
+
agent_did = event.agent_did or "unknown"
|
|
124
|
+
session_id = event.session_id or "unknown"
|
|
125
|
+
self._events_processed += 1
|
|
126
|
+
|
|
127
|
+
if event.event_type in _RING_TRANSITION_EVENTS:
|
|
128
|
+
key = (event.event_type.value, agent_did, session_id)
|
|
129
|
+
self._transition_counts[key] += 1
|
|
130
|
+
|
|
131
|
+
# Track current ring from payload
|
|
132
|
+
to_ring = event.payload.get("to_ring") or event.payload.get("ring")
|
|
133
|
+
if to_ring is not None:
|
|
134
|
+
self._current_rings[agent_did] = (
|
|
135
|
+
to_ring if isinstance(to_ring, int) else int(to_ring)
|
|
136
|
+
)
|
|
137
|
+
|
|
138
|
+
# Track elevation timing
|
|
139
|
+
if event.event_type == EventType.RING_ELEVATED:
|
|
140
|
+
self._elevation_start[agent_did] = event.timestamp.timestamp()
|
|
141
|
+
elif event.event_type in (
|
|
142
|
+
EventType.RING_DEMOTED,
|
|
143
|
+
EventType.RING_ELEVATION_EXPIRED,
|
|
144
|
+
):
|
|
145
|
+
start = self._elevation_start.pop(agent_did, None)
|
|
146
|
+
if start is not None:
|
|
147
|
+
self._elevation_durations[agent_did] = (
|
|
148
|
+
event.timestamp.timestamp() - start
|
|
149
|
+
)
|
|
150
|
+
|
|
151
|
+
elif event.event_type in _RING_BREACH_EVENTS:
|
|
152
|
+
self._breach_counts[(agent_did, session_id)] += 1
|
|
153
|
+
|
|
154
|
+
# ------------------------------------------------------------------
|
|
155
|
+
# Public API
|
|
156
|
+
# ------------------------------------------------------------------
|
|
157
|
+
|
|
158
|
+
def collect(self) -> dict[str, Any]:
|
|
159
|
+
"""Return a snapshot of all metrics as a framework-agnostic dict.
|
|
160
|
+
|
|
161
|
+
Returns:
|
|
162
|
+
Dictionary with metric names as keys and nested dicts/values:
|
|
163
|
+
|
|
164
|
+
- ``agent_hypervisor_ring_transitions_total``:
|
|
165
|
+
``{(event_type, agent, session): count}``
|
|
166
|
+
- ``agent_hypervisor_ring_breaches_total``:
|
|
167
|
+
``{(agent, session): count}``
|
|
168
|
+
- ``agent_hypervisor_ring_current``:
|
|
169
|
+
``{agent: ring_value}``
|
|
170
|
+
- ``agent_hypervisor_ring_elevation_duration_seconds``:
|
|
171
|
+
``{agent: seconds}``
|
|
172
|
+
"""
|
|
173
|
+
return {
|
|
174
|
+
METRIC_RING_TRANSITIONS_TOTAL: dict(self._transition_counts),
|
|
175
|
+
METRIC_RING_BREACHES_TOTAL: dict(self._breach_counts),
|
|
176
|
+
METRIC_RING_CURRENT: dict(self._current_rings),
|
|
177
|
+
METRIC_RING_ELEVATION_DURATION: dict(self._elevation_durations),
|
|
178
|
+
"events_processed": self._events_processed,
|
|
179
|
+
}
|
|
180
|
+
|
|
181
|
+
def export_to_prometheus(self, exporter: PrometheusExporterProtocol) -> None:
|
|
182
|
+
"""Write all current metrics into a Prometheus-compatible exporter.
|
|
183
|
+
|
|
184
|
+
Args:
|
|
185
|
+
exporter: Any object implementing :class:`PrometheusExporterProtocol`
|
|
186
|
+
(e.g. ``agent_sre.integrations.prometheus.PrometheusExporter``).
|
|
187
|
+
"""
|
|
188
|
+
# Transition counters
|
|
189
|
+
for (event_type, agent_did, session_id), count in self._transition_counts.items():
|
|
190
|
+
exporter.inc_counter(
|
|
191
|
+
METRIC_RING_TRANSITIONS_TOTAL,
|
|
192
|
+
float(count),
|
|
193
|
+
labels={
|
|
194
|
+
"event_type": event_type,
|
|
195
|
+
"agent_did": agent_did,
|
|
196
|
+
"session_id": session_id,
|
|
197
|
+
},
|
|
198
|
+
help_text="Total ring transition events by type",
|
|
199
|
+
)
|
|
200
|
+
|
|
201
|
+
# Breach counters
|
|
202
|
+
for (agent_did, session_id), count in self._breach_counts.items():
|
|
203
|
+
exporter.inc_counter(
|
|
204
|
+
METRIC_RING_BREACHES_TOTAL,
|
|
205
|
+
float(count),
|
|
206
|
+
labels={
|
|
207
|
+
"agent_did": agent_did,
|
|
208
|
+
"session_id": session_id,
|
|
209
|
+
},
|
|
210
|
+
help_text="Total ring breach events detected",
|
|
211
|
+
)
|
|
212
|
+
|
|
213
|
+
# Current ring gauge
|
|
214
|
+
for agent_did, ring_value in self._current_rings.items():
|
|
215
|
+
exporter.set_gauge(
|
|
216
|
+
METRIC_RING_CURRENT,
|
|
217
|
+
float(ring_value),
|
|
218
|
+
labels={"agent_did": agent_did},
|
|
219
|
+
help_text="Current execution ring for each agent (0=root, 3=sandbox)",
|
|
220
|
+
)
|
|
221
|
+
|
|
222
|
+
# Elevation duration gauge
|
|
223
|
+
for agent_did, duration in self._elevation_durations.items():
|
|
224
|
+
exporter.set_gauge(
|
|
225
|
+
METRIC_RING_ELEVATION_DURATION,
|
|
226
|
+
duration,
|
|
227
|
+
labels={"agent_did": agent_did},
|
|
228
|
+
help_text="Duration of the last ring elevation in seconds",
|
|
229
|
+
)
|
|
230
|
+
|
|
231
|
+
# Also export any currently-active elevations
|
|
232
|
+
now = time.time()
|
|
233
|
+
for agent_did, start_ts in self._elevation_start.items():
|
|
234
|
+
exporter.set_gauge(
|
|
235
|
+
METRIC_RING_ELEVATION_DURATION,
|
|
236
|
+
now - start_ts,
|
|
237
|
+
labels={"agent_did": agent_did},
|
|
238
|
+
help_text="Duration of the last ring elevation in seconds",
|
|
239
|
+
)
|
|
240
|
+
|
|
241
|
+
def reset(self) -> None:
|
|
242
|
+
"""Reset all counters (for testing)."""
|
|
243
|
+
self._transition_counts.clear()
|
|
244
|
+
self._breach_counts.clear()
|
|
245
|
+
self._current_rings.clear()
|
|
246
|
+
self._elevation_start.clear()
|
|
247
|
+
self._elevation_durations.clear()
|
|
248
|
+
self._events_processed = 0
|