mcp-hangar 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mcp_hangar/__init__.py +139 -0
- mcp_hangar/application/__init__.py +1 -0
- mcp_hangar/application/commands/__init__.py +67 -0
- mcp_hangar/application/commands/auth_commands.py +118 -0
- mcp_hangar/application/commands/auth_handlers.py +296 -0
- mcp_hangar/application/commands/commands.py +59 -0
- mcp_hangar/application/commands/handlers.py +189 -0
- mcp_hangar/application/discovery/__init__.py +21 -0
- mcp_hangar/application/discovery/discovery_metrics.py +283 -0
- mcp_hangar/application/discovery/discovery_orchestrator.py +497 -0
- mcp_hangar/application/discovery/lifecycle_manager.py +315 -0
- mcp_hangar/application/discovery/security_validator.py +414 -0
- mcp_hangar/application/event_handlers/__init__.py +50 -0
- mcp_hangar/application/event_handlers/alert_handler.py +191 -0
- mcp_hangar/application/event_handlers/audit_handler.py +203 -0
- mcp_hangar/application/event_handlers/knowledge_base_handler.py +120 -0
- mcp_hangar/application/event_handlers/logging_handler.py +69 -0
- mcp_hangar/application/event_handlers/metrics_handler.py +152 -0
- mcp_hangar/application/event_handlers/persistent_audit_store.py +217 -0
- mcp_hangar/application/event_handlers/security_handler.py +604 -0
- mcp_hangar/application/mcp/tooling.py +158 -0
- mcp_hangar/application/ports/__init__.py +9 -0
- mcp_hangar/application/ports/observability.py +237 -0
- mcp_hangar/application/queries/__init__.py +52 -0
- mcp_hangar/application/queries/auth_handlers.py +237 -0
- mcp_hangar/application/queries/auth_queries.py +118 -0
- mcp_hangar/application/queries/handlers.py +227 -0
- mcp_hangar/application/read_models/__init__.py +11 -0
- mcp_hangar/application/read_models/provider_views.py +139 -0
- mcp_hangar/application/sagas/__init__.py +11 -0
- mcp_hangar/application/sagas/group_rebalance_saga.py +137 -0
- mcp_hangar/application/sagas/provider_failover_saga.py +266 -0
- mcp_hangar/application/sagas/provider_recovery_saga.py +172 -0
- mcp_hangar/application/services/__init__.py +9 -0
- mcp_hangar/application/services/provider_service.py +208 -0
- mcp_hangar/application/services/traced_provider_service.py +211 -0
- mcp_hangar/bootstrap/runtime.py +328 -0
- mcp_hangar/context.py +178 -0
- mcp_hangar/domain/__init__.py +117 -0
- mcp_hangar/domain/contracts/__init__.py +57 -0
- mcp_hangar/domain/contracts/authentication.py +225 -0
- mcp_hangar/domain/contracts/authorization.py +229 -0
- mcp_hangar/domain/contracts/event_store.py +178 -0
- mcp_hangar/domain/contracts/metrics_publisher.py +59 -0
- mcp_hangar/domain/contracts/persistence.py +383 -0
- mcp_hangar/domain/contracts/provider_runtime.py +146 -0
- mcp_hangar/domain/discovery/__init__.py +20 -0
- mcp_hangar/domain/discovery/conflict_resolver.py +267 -0
- mcp_hangar/domain/discovery/discovered_provider.py +185 -0
- mcp_hangar/domain/discovery/discovery_service.py +412 -0
- mcp_hangar/domain/discovery/discovery_source.py +192 -0
- mcp_hangar/domain/events.py +433 -0
- mcp_hangar/domain/exceptions.py +525 -0
- mcp_hangar/domain/model/__init__.py +70 -0
- mcp_hangar/domain/model/aggregate.py +58 -0
- mcp_hangar/domain/model/circuit_breaker.py +152 -0
- mcp_hangar/domain/model/event_sourced_api_key.py +413 -0
- mcp_hangar/domain/model/event_sourced_provider.py +423 -0
- mcp_hangar/domain/model/event_sourced_role_assignment.py +268 -0
- mcp_hangar/domain/model/health_tracker.py +183 -0
- mcp_hangar/domain/model/load_balancer.py +185 -0
- mcp_hangar/domain/model/provider.py +810 -0
- mcp_hangar/domain/model/provider_group.py +656 -0
- mcp_hangar/domain/model/tool_catalog.py +105 -0
- mcp_hangar/domain/policies/__init__.py +19 -0
- mcp_hangar/domain/policies/provider_health.py +187 -0
- mcp_hangar/domain/repository.py +249 -0
- mcp_hangar/domain/security/__init__.py +85 -0
- mcp_hangar/domain/security/input_validator.py +710 -0
- mcp_hangar/domain/security/rate_limiter.py +387 -0
- mcp_hangar/domain/security/roles.py +237 -0
- mcp_hangar/domain/security/sanitizer.py +387 -0
- mcp_hangar/domain/security/secrets.py +501 -0
- mcp_hangar/domain/services/__init__.py +20 -0
- mcp_hangar/domain/services/audit_service.py +376 -0
- mcp_hangar/domain/services/image_builder.py +328 -0
- mcp_hangar/domain/services/provider_launcher.py +1046 -0
- mcp_hangar/domain/value_objects.py +1138 -0
- mcp_hangar/errors.py +818 -0
- mcp_hangar/fastmcp_server.py +1105 -0
- mcp_hangar/gc.py +134 -0
- mcp_hangar/infrastructure/__init__.py +79 -0
- mcp_hangar/infrastructure/async_executor.py +133 -0
- mcp_hangar/infrastructure/auth/__init__.py +37 -0
- mcp_hangar/infrastructure/auth/api_key_authenticator.py +388 -0
- mcp_hangar/infrastructure/auth/event_sourced_store.py +567 -0
- mcp_hangar/infrastructure/auth/jwt_authenticator.py +360 -0
- mcp_hangar/infrastructure/auth/middleware.py +340 -0
- mcp_hangar/infrastructure/auth/opa_authorizer.py +243 -0
- mcp_hangar/infrastructure/auth/postgres_store.py +659 -0
- mcp_hangar/infrastructure/auth/projections.py +366 -0
- mcp_hangar/infrastructure/auth/rate_limiter.py +311 -0
- mcp_hangar/infrastructure/auth/rbac_authorizer.py +323 -0
- mcp_hangar/infrastructure/auth/sqlite_store.py +624 -0
- mcp_hangar/infrastructure/command_bus.py +112 -0
- mcp_hangar/infrastructure/discovery/__init__.py +110 -0
- mcp_hangar/infrastructure/discovery/docker_source.py +289 -0
- mcp_hangar/infrastructure/discovery/entrypoint_source.py +249 -0
- mcp_hangar/infrastructure/discovery/filesystem_source.py +383 -0
- mcp_hangar/infrastructure/discovery/kubernetes_source.py +247 -0
- mcp_hangar/infrastructure/event_bus.py +260 -0
- mcp_hangar/infrastructure/event_sourced_repository.py +443 -0
- mcp_hangar/infrastructure/event_store.py +396 -0
- mcp_hangar/infrastructure/knowledge_base/__init__.py +259 -0
- mcp_hangar/infrastructure/knowledge_base/contracts.py +202 -0
- mcp_hangar/infrastructure/knowledge_base/memory.py +177 -0
- mcp_hangar/infrastructure/knowledge_base/postgres.py +545 -0
- mcp_hangar/infrastructure/knowledge_base/sqlite.py +513 -0
- mcp_hangar/infrastructure/metrics_publisher.py +36 -0
- mcp_hangar/infrastructure/observability/__init__.py +10 -0
- mcp_hangar/infrastructure/observability/langfuse_adapter.py +534 -0
- mcp_hangar/infrastructure/persistence/__init__.py +33 -0
- mcp_hangar/infrastructure/persistence/audit_repository.py +371 -0
- mcp_hangar/infrastructure/persistence/config_repository.py +398 -0
- mcp_hangar/infrastructure/persistence/database.py +333 -0
- mcp_hangar/infrastructure/persistence/database_common.py +330 -0
- mcp_hangar/infrastructure/persistence/event_serializer.py +280 -0
- mcp_hangar/infrastructure/persistence/event_upcaster.py +166 -0
- mcp_hangar/infrastructure/persistence/in_memory_event_store.py +150 -0
- mcp_hangar/infrastructure/persistence/recovery_service.py +312 -0
- mcp_hangar/infrastructure/persistence/sqlite_event_store.py +386 -0
- mcp_hangar/infrastructure/persistence/unit_of_work.py +409 -0
- mcp_hangar/infrastructure/persistence/upcasters/README.md +13 -0
- mcp_hangar/infrastructure/persistence/upcasters/__init__.py +7 -0
- mcp_hangar/infrastructure/query_bus.py +153 -0
- mcp_hangar/infrastructure/saga_manager.py +401 -0
- mcp_hangar/logging_config.py +209 -0
- mcp_hangar/metrics.py +1007 -0
- mcp_hangar/models.py +31 -0
- mcp_hangar/observability/__init__.py +54 -0
- mcp_hangar/observability/health.py +487 -0
- mcp_hangar/observability/metrics.py +319 -0
- mcp_hangar/observability/tracing.py +433 -0
- mcp_hangar/progress.py +542 -0
- mcp_hangar/retry.py +613 -0
- mcp_hangar/server/__init__.py +120 -0
- mcp_hangar/server/__main__.py +6 -0
- mcp_hangar/server/auth_bootstrap.py +340 -0
- mcp_hangar/server/auth_cli.py +335 -0
- mcp_hangar/server/auth_config.py +305 -0
- mcp_hangar/server/bootstrap.py +735 -0
- mcp_hangar/server/cli.py +161 -0
- mcp_hangar/server/config.py +224 -0
- mcp_hangar/server/context.py +215 -0
- mcp_hangar/server/http_auth_middleware.py +165 -0
- mcp_hangar/server/lifecycle.py +467 -0
- mcp_hangar/server/state.py +117 -0
- mcp_hangar/server/tools/__init__.py +16 -0
- mcp_hangar/server/tools/discovery.py +186 -0
- mcp_hangar/server/tools/groups.py +75 -0
- mcp_hangar/server/tools/health.py +301 -0
- mcp_hangar/server/tools/provider.py +939 -0
- mcp_hangar/server/tools/registry.py +320 -0
- mcp_hangar/server/validation.py +113 -0
- mcp_hangar/stdio_client.py +229 -0
- mcp_hangar-0.2.0.dist-info/METADATA +347 -0
- mcp_hangar-0.2.0.dist-info/RECORD +160 -0
- mcp_hangar-0.2.0.dist-info/WHEEL +4 -0
- mcp_hangar-0.2.0.dist-info/entry_points.txt +2 -0
- mcp_hangar-0.2.0.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,319 @@
|
|
|
1
|
+
"""Extended observability metrics for MCP Hangar.
|
|
2
|
+
|
|
3
|
+
Adds metrics not covered by the base metrics module:
|
|
4
|
+
- Circuit breaker state
|
|
5
|
+
- Retry attempts
|
|
6
|
+
- Queue depths
|
|
7
|
+
- Resource usage (where available)
|
|
8
|
+
- Cold start detailed timing
|
|
9
|
+
|
|
10
|
+
These metrics complement mcp_hangar.metrics with observability-specific
|
|
11
|
+
measurements useful for dashboards and alerting.
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
from dataclasses import dataclass
|
|
15
|
+
from enum import Enum
|
|
16
|
+
import threading
|
|
17
|
+
from typing import Optional
|
|
18
|
+
|
|
19
|
+
from mcp_hangar.logging_config import get_logger
|
|
20
|
+
from mcp_hangar.metrics import Counter, Gauge, Histogram, REGISTRY
|
|
21
|
+
|
|
22
|
+
logger = get_logger(__name__)
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class CircuitState(Enum):
|
|
26
|
+
"""Circuit breaker states."""
|
|
27
|
+
|
|
28
|
+
CLOSED = "closed" # Normal operation
|
|
29
|
+
OPEN = "open" # Failing, rejecting requests
|
|
30
|
+
HALF_OPEN = "half_open" # Testing if recovered
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
@dataclass
|
|
34
|
+
class ColdStartTiming:
|
|
35
|
+
"""Detailed timing for cold start phases."""
|
|
36
|
+
|
|
37
|
+
total_ms: float = 0.0
|
|
38
|
+
process_spawn_ms: float = 0.0
|
|
39
|
+
connection_ms: float = 0.0
|
|
40
|
+
tool_discovery_ms: float = 0.0
|
|
41
|
+
first_health_check_ms: float = 0.0
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
class ObservabilityMetrics:
|
|
45
|
+
"""Extended metrics for observability dashboards and alerts.
|
|
46
|
+
|
|
47
|
+
Thread-safe singleton providing additional metrics beyond
|
|
48
|
+
the base metrics module.
|
|
49
|
+
"""
|
|
50
|
+
|
|
51
|
+
_instance: Optional["ObservabilityMetrics"] = None
|
|
52
|
+
_lock = threading.Lock()
|
|
53
|
+
|
|
54
|
+
def __new__(cls) -> "ObservabilityMetrics":
|
|
55
|
+
if cls._instance is None:
|
|
56
|
+
with cls._lock:
|
|
57
|
+
if cls._instance is None:
|
|
58
|
+
cls._instance = super().__new__(cls)
|
|
59
|
+
cls._instance._initialize()
|
|
60
|
+
return cls._instance
|
|
61
|
+
|
|
62
|
+
def _initialize(self) -> None:
|
|
63
|
+
"""Initialize metrics."""
|
|
64
|
+
# Circuit breaker metrics
|
|
65
|
+
self.circuit_breaker_state = Gauge(
|
|
66
|
+
name="mcp_registry_circuit_breaker_state",
|
|
67
|
+
description="Circuit breaker state (0=closed, 1=open, 2=half_open)",
|
|
68
|
+
labels=["provider"],
|
|
69
|
+
)
|
|
70
|
+
|
|
71
|
+
self.circuit_breaker_failures = Counter(
|
|
72
|
+
name="mcp_registry_circuit_breaker_failures_total",
|
|
73
|
+
description="Total circuit breaker failures",
|
|
74
|
+
labels=["provider"],
|
|
75
|
+
)
|
|
76
|
+
|
|
77
|
+
self.circuit_breaker_successes = Counter(
|
|
78
|
+
name="mcp_registry_circuit_breaker_successes_total",
|
|
79
|
+
description="Total circuit breaker successes after recovery",
|
|
80
|
+
labels=["provider"],
|
|
81
|
+
)
|
|
82
|
+
|
|
83
|
+
# Retry metrics
|
|
84
|
+
self.retry_attempts = Counter(
|
|
85
|
+
name="mcp_registry_retry_attempts_total",
|
|
86
|
+
description="Total retry attempts",
|
|
87
|
+
labels=["provider", "tool", "attempt_number"],
|
|
88
|
+
)
|
|
89
|
+
|
|
90
|
+
self.retry_exhausted = Counter(
|
|
91
|
+
name="mcp_registry_retry_exhausted_total",
|
|
92
|
+
description="Total times all retries were exhausted",
|
|
93
|
+
labels=["provider", "tool"],
|
|
94
|
+
)
|
|
95
|
+
|
|
96
|
+
self.retry_succeeded = Counter(
|
|
97
|
+
name="mcp_registry_retry_succeeded_total",
|
|
98
|
+
description="Total times retry succeeded after failure",
|
|
99
|
+
labels=["provider", "tool", "attempt_number"],
|
|
100
|
+
)
|
|
101
|
+
|
|
102
|
+
# Queue metrics
|
|
103
|
+
self.pending_requests = Gauge(
|
|
104
|
+
name="mcp_registry_pending_requests",
|
|
105
|
+
description="Number of pending requests per provider",
|
|
106
|
+
labels=["provider"],
|
|
107
|
+
)
|
|
108
|
+
|
|
109
|
+
self.request_queue_time_seconds = Histogram(
|
|
110
|
+
name="mcp_registry_request_queue_time_seconds",
|
|
111
|
+
description="Time requests spend waiting in queue",
|
|
112
|
+
labels=["provider"],
|
|
113
|
+
buckets=(0.001, 0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1.0, 2.5),
|
|
114
|
+
)
|
|
115
|
+
|
|
116
|
+
# Cold start detailed metrics
|
|
117
|
+
self.cold_start_phase_duration = Histogram(
|
|
118
|
+
name="mcp_registry_cold_start_phase_duration_seconds",
|
|
119
|
+
description="Duration of cold start phases",
|
|
120
|
+
labels=["provider", "phase"], # phase: spawn, connect, discover, health
|
|
121
|
+
buckets=(0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1.0, 2.5, 5.0, 10.0),
|
|
122
|
+
)
|
|
123
|
+
|
|
124
|
+
self.cold_starts_in_progress = Gauge(
|
|
125
|
+
name="mcp_registry_cold_starts_in_progress",
|
|
126
|
+
description="Number of cold starts currently in progress",
|
|
127
|
+
labels=["provider"],
|
|
128
|
+
)
|
|
129
|
+
|
|
130
|
+
# Resource metrics (best-effort)
|
|
131
|
+
self.provider_memory_bytes = Gauge(
|
|
132
|
+
name="mcp_registry_provider_memory_bytes",
|
|
133
|
+
description="Memory usage of provider process in bytes",
|
|
134
|
+
labels=["provider"],
|
|
135
|
+
)
|
|
136
|
+
|
|
137
|
+
self.provider_cpu_percent = Gauge(
|
|
138
|
+
name="mcp_registry_provider_cpu_percent",
|
|
139
|
+
description="CPU usage percentage of provider process",
|
|
140
|
+
labels=["provider"],
|
|
141
|
+
)
|
|
142
|
+
|
|
143
|
+
# SLI metrics
|
|
144
|
+
self.availability_ratio = Gauge(
|
|
145
|
+
name="mcp_registry_availability_ratio",
|
|
146
|
+
description="Availability ratio (ready providers / total providers)",
|
|
147
|
+
)
|
|
148
|
+
|
|
149
|
+
self.error_budget_remaining = Gauge(
|
|
150
|
+
name="mcp_registry_error_budget_remaining",
|
|
151
|
+
description="Remaining error budget ratio (1.0 = full budget)",
|
|
152
|
+
)
|
|
153
|
+
|
|
154
|
+
# Saturation metrics
|
|
155
|
+
self.provider_utilization = Gauge(
|
|
156
|
+
name="mcp_registry_provider_utilization",
|
|
157
|
+
description="Provider utilization ratio (active/capacity)",
|
|
158
|
+
labels=["provider"],
|
|
159
|
+
)
|
|
160
|
+
|
|
161
|
+
# Register all with global registry
|
|
162
|
+
self._register_metrics()
|
|
163
|
+
|
|
164
|
+
logger.debug("observability_metrics_initialized")
|
|
165
|
+
|
|
166
|
+
def _register_metrics(self) -> None:
|
|
167
|
+
"""Register metrics with global registry."""
|
|
168
|
+
metrics = [
|
|
169
|
+
self.circuit_breaker_state,
|
|
170
|
+
self.circuit_breaker_failures,
|
|
171
|
+
self.circuit_breaker_successes,
|
|
172
|
+
self.retry_attempts,
|
|
173
|
+
self.retry_exhausted,
|
|
174
|
+
self.retry_succeeded,
|
|
175
|
+
self.pending_requests,
|
|
176
|
+
self.request_queue_time_seconds,
|
|
177
|
+
self.cold_start_phase_duration,
|
|
178
|
+
self.cold_starts_in_progress,
|
|
179
|
+
self.provider_memory_bytes,
|
|
180
|
+
self.provider_cpu_percent,
|
|
181
|
+
self.availability_ratio,
|
|
182
|
+
self.error_budget_remaining,
|
|
183
|
+
self.provider_utilization,
|
|
184
|
+
]
|
|
185
|
+
|
|
186
|
+
for metric in metrics:
|
|
187
|
+
try:
|
|
188
|
+
REGISTRY.register(metric)
|
|
189
|
+
except ValueError:
|
|
190
|
+
# Already registered
|
|
191
|
+
pass
|
|
192
|
+
|
|
193
|
+
# Circuit breaker methods
|
|
194
|
+
def set_circuit_state(self, provider: str, state: CircuitState) -> None:
|
|
195
|
+
"""Update circuit breaker state."""
|
|
196
|
+
state_value = {"closed": 0, "open": 1, "half_open": 2}.get(state.value, 0)
|
|
197
|
+
self.circuit_breaker_state.set(state_value, provider=provider)
|
|
198
|
+
|
|
199
|
+
def record_circuit_failure(self, provider: str) -> None:
|
|
200
|
+
"""Record circuit breaker failure."""
|
|
201
|
+
self.circuit_breaker_failures.inc(provider=provider)
|
|
202
|
+
|
|
203
|
+
def record_circuit_success(self, provider: str) -> None:
|
|
204
|
+
"""Record circuit breaker success (recovery)."""
|
|
205
|
+
self.circuit_breaker_successes.inc(provider=provider)
|
|
206
|
+
|
|
207
|
+
# Retry methods
|
|
208
|
+
def record_retry_attempt(self, provider: str, tool: str, attempt: int) -> None:
|
|
209
|
+
"""Record a retry attempt."""
|
|
210
|
+
self.retry_attempts.inc(provider=provider, tool=tool, attempt_number=str(attempt))
|
|
211
|
+
|
|
212
|
+
def record_retry_exhausted(self, provider: str, tool: str) -> None:
|
|
213
|
+
"""Record when all retries are exhausted."""
|
|
214
|
+
self.retry_exhausted.inc(provider=provider, tool=tool)
|
|
215
|
+
|
|
216
|
+
def record_retry_success(self, provider: str, tool: str, attempt: int) -> None:
|
|
217
|
+
"""Record successful retry."""
|
|
218
|
+
self.retry_succeeded.inc(provider=provider, tool=tool, attempt_number=str(attempt))
|
|
219
|
+
|
|
220
|
+
# Queue methods
|
|
221
|
+
def set_pending_requests(self, provider: str, count: int) -> None:
|
|
222
|
+
"""Update pending request count."""
|
|
223
|
+
self.pending_requests.set(count, provider=provider)
|
|
224
|
+
|
|
225
|
+
def observe_queue_time(self, provider: str, duration_seconds: float) -> None:
|
|
226
|
+
"""Record time spent in queue."""
|
|
227
|
+
self.request_queue_time_seconds.observe(duration_seconds, provider=provider)
|
|
228
|
+
|
|
229
|
+
# Cold start methods
|
|
230
|
+
def record_cold_start_phase(self, provider: str, phase: str, duration_seconds: float) -> None:
|
|
231
|
+
"""Record duration of a cold start phase.
|
|
232
|
+
|
|
233
|
+
Args:
|
|
234
|
+
provider: Provider ID.
|
|
235
|
+
phase: Phase name (spawn, connect, discover, health).
|
|
236
|
+
duration_seconds: Phase duration.
|
|
237
|
+
"""
|
|
238
|
+
self.cold_start_phase_duration.observe(duration_seconds, provider=provider, phase=phase)
|
|
239
|
+
|
|
240
|
+
def cold_start_began(self, provider: str) -> None:
|
|
241
|
+
"""Mark cold start in progress."""
|
|
242
|
+
self.cold_starts_in_progress.inc(provider=provider)
|
|
243
|
+
|
|
244
|
+
def cold_start_completed(self, provider: str) -> None:
|
|
245
|
+
"""Mark cold start completed."""
|
|
246
|
+
self.cold_starts_in_progress.dec(provider=provider)
|
|
247
|
+
|
|
248
|
+
# Resource methods
|
|
249
|
+
def update_provider_resources(
|
|
250
|
+
self,
|
|
251
|
+
provider: str,
|
|
252
|
+
memory_bytes: Optional[int] = None,
|
|
253
|
+
cpu_percent: Optional[float] = None,
|
|
254
|
+
) -> None:
|
|
255
|
+
"""Update provider resource metrics.
|
|
256
|
+
|
|
257
|
+
Args:
|
|
258
|
+
provider: Provider ID.
|
|
259
|
+
memory_bytes: Memory usage in bytes.
|
|
260
|
+
cpu_percent: CPU usage percentage (0-100).
|
|
261
|
+
"""
|
|
262
|
+
if memory_bytes is not None:
|
|
263
|
+
self.provider_memory_bytes.set(memory_bytes, provider=provider)
|
|
264
|
+
if cpu_percent is not None:
|
|
265
|
+
self.provider_cpu_percent.set(cpu_percent, provider=provider)
|
|
266
|
+
|
|
267
|
+
# SLI methods
|
|
268
|
+
def update_availability(self, ready_count: int, total_count: int) -> None:
|
|
269
|
+
"""Update availability ratio.
|
|
270
|
+
|
|
271
|
+
Args:
|
|
272
|
+
ready_count: Number of ready providers.
|
|
273
|
+
total_count: Total number of providers.
|
|
274
|
+
"""
|
|
275
|
+
if total_count > 0:
|
|
276
|
+
ratio = ready_count / total_count
|
|
277
|
+
else:
|
|
278
|
+
ratio = 1.0 # No providers = 100% available (vacuous truth)
|
|
279
|
+
self.availability_ratio.set(ratio)
|
|
280
|
+
|
|
281
|
+
def update_error_budget(self, remaining_ratio: float) -> None:
|
|
282
|
+
"""Update error budget remaining.
|
|
283
|
+
|
|
284
|
+
Args:
|
|
285
|
+
remaining_ratio: Ratio of error budget remaining (0.0 - 1.0).
|
|
286
|
+
"""
|
|
287
|
+
self.error_budget_remaining.set(max(0.0, min(1.0, remaining_ratio)))
|
|
288
|
+
|
|
289
|
+
def update_utilization(self, provider: str, ratio: float) -> None:
|
|
290
|
+
"""Update provider utilization.
|
|
291
|
+
|
|
292
|
+
Args:
|
|
293
|
+
provider: Provider ID.
|
|
294
|
+
ratio: Utilization ratio (0.0 - 1.0).
|
|
295
|
+
"""
|
|
296
|
+
self.provider_utilization.set(ratio, provider=provider)
|
|
297
|
+
|
|
298
|
+
|
|
299
|
+
# Singleton accessor
|
|
300
|
+
_metrics_instance: Optional[ObservabilityMetrics] = None
|
|
301
|
+
|
|
302
|
+
|
|
303
|
+
def get_observability_metrics() -> ObservabilityMetrics:
|
|
304
|
+
"""Get the observability metrics singleton.
|
|
305
|
+
|
|
306
|
+
Returns:
|
|
307
|
+
ObservabilityMetrics instance.
|
|
308
|
+
"""
|
|
309
|
+
global _metrics_instance
|
|
310
|
+
if _metrics_instance is None:
|
|
311
|
+
_metrics_instance = ObservabilityMetrics()
|
|
312
|
+
return _metrics_instance
|
|
313
|
+
|
|
314
|
+
|
|
315
|
+
def reset_observability_metrics() -> None:
|
|
316
|
+
"""Reset metrics singleton (for testing)."""
|
|
317
|
+
global _metrics_instance
|
|
318
|
+
_metrics_instance = None
|
|
319
|
+
ObservabilityMetrics._instance = None
|