mcp-hangar 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mcp_hangar/__init__.py +139 -0
- mcp_hangar/application/__init__.py +1 -0
- mcp_hangar/application/commands/__init__.py +67 -0
- mcp_hangar/application/commands/auth_commands.py +118 -0
- mcp_hangar/application/commands/auth_handlers.py +296 -0
- mcp_hangar/application/commands/commands.py +59 -0
- mcp_hangar/application/commands/handlers.py +189 -0
- mcp_hangar/application/discovery/__init__.py +21 -0
- mcp_hangar/application/discovery/discovery_metrics.py +283 -0
- mcp_hangar/application/discovery/discovery_orchestrator.py +497 -0
- mcp_hangar/application/discovery/lifecycle_manager.py +315 -0
- mcp_hangar/application/discovery/security_validator.py +414 -0
- mcp_hangar/application/event_handlers/__init__.py +50 -0
- mcp_hangar/application/event_handlers/alert_handler.py +191 -0
- mcp_hangar/application/event_handlers/audit_handler.py +203 -0
- mcp_hangar/application/event_handlers/knowledge_base_handler.py +120 -0
- mcp_hangar/application/event_handlers/logging_handler.py +69 -0
- mcp_hangar/application/event_handlers/metrics_handler.py +152 -0
- mcp_hangar/application/event_handlers/persistent_audit_store.py +217 -0
- mcp_hangar/application/event_handlers/security_handler.py +604 -0
- mcp_hangar/application/mcp/tooling.py +158 -0
- mcp_hangar/application/ports/__init__.py +9 -0
- mcp_hangar/application/ports/observability.py +237 -0
- mcp_hangar/application/queries/__init__.py +52 -0
- mcp_hangar/application/queries/auth_handlers.py +237 -0
- mcp_hangar/application/queries/auth_queries.py +118 -0
- mcp_hangar/application/queries/handlers.py +227 -0
- mcp_hangar/application/read_models/__init__.py +11 -0
- mcp_hangar/application/read_models/provider_views.py +139 -0
- mcp_hangar/application/sagas/__init__.py +11 -0
- mcp_hangar/application/sagas/group_rebalance_saga.py +137 -0
- mcp_hangar/application/sagas/provider_failover_saga.py +266 -0
- mcp_hangar/application/sagas/provider_recovery_saga.py +172 -0
- mcp_hangar/application/services/__init__.py +9 -0
- mcp_hangar/application/services/provider_service.py +208 -0
- mcp_hangar/application/services/traced_provider_service.py +211 -0
- mcp_hangar/bootstrap/runtime.py +328 -0
- mcp_hangar/context.py +178 -0
- mcp_hangar/domain/__init__.py +117 -0
- mcp_hangar/domain/contracts/__init__.py +57 -0
- mcp_hangar/domain/contracts/authentication.py +225 -0
- mcp_hangar/domain/contracts/authorization.py +229 -0
- mcp_hangar/domain/contracts/event_store.py +178 -0
- mcp_hangar/domain/contracts/metrics_publisher.py +59 -0
- mcp_hangar/domain/contracts/persistence.py +383 -0
- mcp_hangar/domain/contracts/provider_runtime.py +146 -0
- mcp_hangar/domain/discovery/__init__.py +20 -0
- mcp_hangar/domain/discovery/conflict_resolver.py +267 -0
- mcp_hangar/domain/discovery/discovered_provider.py +185 -0
- mcp_hangar/domain/discovery/discovery_service.py +412 -0
- mcp_hangar/domain/discovery/discovery_source.py +192 -0
- mcp_hangar/domain/events.py +433 -0
- mcp_hangar/domain/exceptions.py +525 -0
- mcp_hangar/domain/model/__init__.py +70 -0
- mcp_hangar/domain/model/aggregate.py +58 -0
- mcp_hangar/domain/model/circuit_breaker.py +152 -0
- mcp_hangar/domain/model/event_sourced_api_key.py +413 -0
- mcp_hangar/domain/model/event_sourced_provider.py +423 -0
- mcp_hangar/domain/model/event_sourced_role_assignment.py +268 -0
- mcp_hangar/domain/model/health_tracker.py +183 -0
- mcp_hangar/domain/model/load_balancer.py +185 -0
- mcp_hangar/domain/model/provider.py +810 -0
- mcp_hangar/domain/model/provider_group.py +656 -0
- mcp_hangar/domain/model/tool_catalog.py +105 -0
- mcp_hangar/domain/policies/__init__.py +19 -0
- mcp_hangar/domain/policies/provider_health.py +187 -0
- mcp_hangar/domain/repository.py +249 -0
- mcp_hangar/domain/security/__init__.py +85 -0
- mcp_hangar/domain/security/input_validator.py +710 -0
- mcp_hangar/domain/security/rate_limiter.py +387 -0
- mcp_hangar/domain/security/roles.py +237 -0
- mcp_hangar/domain/security/sanitizer.py +387 -0
- mcp_hangar/domain/security/secrets.py +501 -0
- mcp_hangar/domain/services/__init__.py +20 -0
- mcp_hangar/domain/services/audit_service.py +376 -0
- mcp_hangar/domain/services/image_builder.py +328 -0
- mcp_hangar/domain/services/provider_launcher.py +1046 -0
- mcp_hangar/domain/value_objects.py +1138 -0
- mcp_hangar/errors.py +818 -0
- mcp_hangar/fastmcp_server.py +1105 -0
- mcp_hangar/gc.py +134 -0
- mcp_hangar/infrastructure/__init__.py +79 -0
- mcp_hangar/infrastructure/async_executor.py +133 -0
- mcp_hangar/infrastructure/auth/__init__.py +37 -0
- mcp_hangar/infrastructure/auth/api_key_authenticator.py +388 -0
- mcp_hangar/infrastructure/auth/event_sourced_store.py +567 -0
- mcp_hangar/infrastructure/auth/jwt_authenticator.py +360 -0
- mcp_hangar/infrastructure/auth/middleware.py +340 -0
- mcp_hangar/infrastructure/auth/opa_authorizer.py +243 -0
- mcp_hangar/infrastructure/auth/postgres_store.py +659 -0
- mcp_hangar/infrastructure/auth/projections.py +366 -0
- mcp_hangar/infrastructure/auth/rate_limiter.py +311 -0
- mcp_hangar/infrastructure/auth/rbac_authorizer.py +323 -0
- mcp_hangar/infrastructure/auth/sqlite_store.py +624 -0
- mcp_hangar/infrastructure/command_bus.py +112 -0
- mcp_hangar/infrastructure/discovery/__init__.py +110 -0
- mcp_hangar/infrastructure/discovery/docker_source.py +289 -0
- mcp_hangar/infrastructure/discovery/entrypoint_source.py +249 -0
- mcp_hangar/infrastructure/discovery/filesystem_source.py +383 -0
- mcp_hangar/infrastructure/discovery/kubernetes_source.py +247 -0
- mcp_hangar/infrastructure/event_bus.py +260 -0
- mcp_hangar/infrastructure/event_sourced_repository.py +443 -0
- mcp_hangar/infrastructure/event_store.py +396 -0
- mcp_hangar/infrastructure/knowledge_base/__init__.py +259 -0
- mcp_hangar/infrastructure/knowledge_base/contracts.py +202 -0
- mcp_hangar/infrastructure/knowledge_base/memory.py +177 -0
- mcp_hangar/infrastructure/knowledge_base/postgres.py +545 -0
- mcp_hangar/infrastructure/knowledge_base/sqlite.py +513 -0
- mcp_hangar/infrastructure/metrics_publisher.py +36 -0
- mcp_hangar/infrastructure/observability/__init__.py +10 -0
- mcp_hangar/infrastructure/observability/langfuse_adapter.py +534 -0
- mcp_hangar/infrastructure/persistence/__init__.py +33 -0
- mcp_hangar/infrastructure/persistence/audit_repository.py +371 -0
- mcp_hangar/infrastructure/persistence/config_repository.py +398 -0
- mcp_hangar/infrastructure/persistence/database.py +333 -0
- mcp_hangar/infrastructure/persistence/database_common.py +330 -0
- mcp_hangar/infrastructure/persistence/event_serializer.py +280 -0
- mcp_hangar/infrastructure/persistence/event_upcaster.py +166 -0
- mcp_hangar/infrastructure/persistence/in_memory_event_store.py +150 -0
- mcp_hangar/infrastructure/persistence/recovery_service.py +312 -0
- mcp_hangar/infrastructure/persistence/sqlite_event_store.py +386 -0
- mcp_hangar/infrastructure/persistence/unit_of_work.py +409 -0
- mcp_hangar/infrastructure/persistence/upcasters/README.md +13 -0
- mcp_hangar/infrastructure/persistence/upcasters/__init__.py +7 -0
- mcp_hangar/infrastructure/query_bus.py +153 -0
- mcp_hangar/infrastructure/saga_manager.py +401 -0
- mcp_hangar/logging_config.py +209 -0
- mcp_hangar/metrics.py +1007 -0
- mcp_hangar/models.py +31 -0
- mcp_hangar/observability/__init__.py +54 -0
- mcp_hangar/observability/health.py +487 -0
- mcp_hangar/observability/metrics.py +319 -0
- mcp_hangar/observability/tracing.py +433 -0
- mcp_hangar/progress.py +542 -0
- mcp_hangar/retry.py +613 -0
- mcp_hangar/server/__init__.py +120 -0
- mcp_hangar/server/__main__.py +6 -0
- mcp_hangar/server/auth_bootstrap.py +340 -0
- mcp_hangar/server/auth_cli.py +335 -0
- mcp_hangar/server/auth_config.py +305 -0
- mcp_hangar/server/bootstrap.py +735 -0
- mcp_hangar/server/cli.py +161 -0
- mcp_hangar/server/config.py +224 -0
- mcp_hangar/server/context.py +215 -0
- mcp_hangar/server/http_auth_middleware.py +165 -0
- mcp_hangar/server/lifecycle.py +467 -0
- mcp_hangar/server/state.py +117 -0
- mcp_hangar/server/tools/__init__.py +16 -0
- mcp_hangar/server/tools/discovery.py +186 -0
- mcp_hangar/server/tools/groups.py +75 -0
- mcp_hangar/server/tools/health.py +301 -0
- mcp_hangar/server/tools/provider.py +939 -0
- mcp_hangar/server/tools/registry.py +320 -0
- mcp_hangar/server/validation.py +113 -0
- mcp_hangar/stdio_client.py +229 -0
- mcp_hangar-0.2.0.dist-info/METADATA +347 -0
- mcp_hangar-0.2.0.dist-info/RECORD +160 -0
- mcp_hangar-0.2.0.dist-info/WHEEL +4 -0
- mcp_hangar-0.2.0.dist-info/entry_points.txt +2 -0
- mcp_hangar-0.2.0.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,189 @@
|
|
|
1
|
+
"""Command handlers implementation."""
|
|
2
|
+
|
|
3
|
+
import time
|
|
4
|
+
from typing import Any, Dict, List
|
|
5
|
+
|
|
6
|
+
from ...domain.contracts.provider_runtime import ProviderRuntime
|
|
7
|
+
from ...domain.exceptions import ProviderNotFoundError
|
|
8
|
+
from ...domain.repository import IProviderRepository
|
|
9
|
+
from ...infrastructure.command_bus import CommandBus, CommandHandler
|
|
10
|
+
from ...infrastructure.event_bus import EventBus
|
|
11
|
+
from ...logging_config import get_logger
|
|
12
|
+
from ...metrics import observe_tool_call, record_error, record_provider_start, record_provider_stop
|
|
13
|
+
from .commands import (
|
|
14
|
+
HealthCheckCommand,
|
|
15
|
+
InvokeToolCommand,
|
|
16
|
+
ShutdownIdleProvidersCommand,
|
|
17
|
+
StartProviderCommand,
|
|
18
|
+
StopProviderCommand,
|
|
19
|
+
)
|
|
20
|
+
|
|
21
|
+
logger = get_logger(__name__)
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class BaseProviderHandler(CommandHandler):
|
|
25
|
+
"""Base class for handlers that work with providers."""
|
|
26
|
+
|
|
27
|
+
def __init__(self, repository: IProviderRepository, event_bus: EventBus):
|
|
28
|
+
self._repository = repository
|
|
29
|
+
self._event_bus = event_bus
|
|
30
|
+
|
|
31
|
+
def _get_provider(self, provider_id: str) -> ProviderRuntime:
|
|
32
|
+
"""Get provider or raise domain ProviderNotFoundError."""
|
|
33
|
+
provider = self._repository.get(provider_id)
|
|
34
|
+
if provider is None:
|
|
35
|
+
raise ProviderNotFoundError(provider_id)
|
|
36
|
+
return provider
|
|
37
|
+
|
|
38
|
+
def _publish_events(self, provider: ProviderRuntime) -> None:
|
|
39
|
+
"""Publish collected events from provider (no duck typing)."""
|
|
40
|
+
for event in provider.collect_events():
|
|
41
|
+
try:
|
|
42
|
+
self._event_bus.publish(event)
|
|
43
|
+
except (RuntimeError, ValueError, TypeError) as e:
|
|
44
|
+
logger.error(
|
|
45
|
+
"event_publish_failed",
|
|
46
|
+
event_type=type(event).__name__,
|
|
47
|
+
error=str(e),
|
|
48
|
+
exc_info=True,
|
|
49
|
+
)
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
class StartProviderHandler(BaseProviderHandler):
|
|
53
|
+
"""Handler for StartProviderCommand."""
|
|
54
|
+
|
|
55
|
+
def handle(self, command: StartProviderCommand) -> Dict[str, Any]:
|
|
56
|
+
"""
|
|
57
|
+
Start a provider.
|
|
58
|
+
|
|
59
|
+
Returns:
|
|
60
|
+
Dict with provider state and tools
|
|
61
|
+
"""
|
|
62
|
+
provider = self._get_provider(command.provider_id)
|
|
63
|
+
try:
|
|
64
|
+
provider.ensure_ready()
|
|
65
|
+
record_provider_start(command.provider_id, success=True)
|
|
66
|
+
except Exception as e:
|
|
67
|
+
record_provider_start(command.provider_id, success=False)
|
|
68
|
+
record_error("provider", type(e).__name__)
|
|
69
|
+
raise
|
|
70
|
+
finally:
|
|
71
|
+
self._publish_events(provider)
|
|
72
|
+
|
|
73
|
+
return {
|
|
74
|
+
"provider": command.provider_id,
|
|
75
|
+
"state": provider.state.value,
|
|
76
|
+
"tools": provider.get_tool_names(),
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
class StopProviderHandler(BaseProviderHandler):
|
|
81
|
+
"""Handler for StopProviderCommand."""
|
|
82
|
+
|
|
83
|
+
def handle(self, command: StopProviderCommand) -> Dict[str, Any]:
|
|
84
|
+
"""
|
|
85
|
+
Stop a provider.
|
|
86
|
+
|
|
87
|
+
Returns:
|
|
88
|
+
Confirmation dict
|
|
89
|
+
"""
|
|
90
|
+
provider = self._get_provider(command.provider_id)
|
|
91
|
+
provider.shutdown()
|
|
92
|
+
record_provider_stop(command.provider_id, reason=command.reason or "manual")
|
|
93
|
+
self._publish_events(provider)
|
|
94
|
+
|
|
95
|
+
return {"stopped": command.provider_id, "reason": command.reason}
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
class InvokeToolHandler(BaseProviderHandler):
|
|
99
|
+
"""Handler for InvokeToolCommand."""
|
|
100
|
+
|
|
101
|
+
def handle(self, command: InvokeToolCommand) -> Dict[str, Any]:
|
|
102
|
+
"""
|
|
103
|
+
Invoke a tool on a provider.
|
|
104
|
+
|
|
105
|
+
Returns:
|
|
106
|
+
Tool result
|
|
107
|
+
"""
|
|
108
|
+
provider = self._get_provider(command.provider_id)
|
|
109
|
+
|
|
110
|
+
start_time = time.perf_counter()
|
|
111
|
+
error_type = None
|
|
112
|
+
success = False
|
|
113
|
+
|
|
114
|
+
try:
|
|
115
|
+
result = provider.invoke_tool(command.tool_name, command.arguments, command.timeout)
|
|
116
|
+
success = True
|
|
117
|
+
return result
|
|
118
|
+
|
|
119
|
+
except Exception as e:
|
|
120
|
+
error_type = type(e).__name__
|
|
121
|
+
raise
|
|
122
|
+
|
|
123
|
+
finally:
|
|
124
|
+
duration = time.perf_counter() - start_time
|
|
125
|
+
observe_tool_call(
|
|
126
|
+
provider=command.provider_id,
|
|
127
|
+
tool=command.tool_name,
|
|
128
|
+
duration=duration,
|
|
129
|
+
success=success,
|
|
130
|
+
error_type=error_type,
|
|
131
|
+
)
|
|
132
|
+
self._publish_events(provider)
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
class HealthCheckHandler(BaseProviderHandler):
|
|
136
|
+
"""Handler for HealthCheckCommand."""
|
|
137
|
+
|
|
138
|
+
def handle(self, command: HealthCheckCommand) -> bool:
|
|
139
|
+
"""
|
|
140
|
+
Perform health check on a provider.
|
|
141
|
+
|
|
142
|
+
Returns:
|
|
143
|
+
True if healthy, False otherwise
|
|
144
|
+
"""
|
|
145
|
+
provider = self._get_provider(command.provider_id)
|
|
146
|
+
result = provider.health_check()
|
|
147
|
+
self._publish_events(provider)
|
|
148
|
+
|
|
149
|
+
return result
|
|
150
|
+
|
|
151
|
+
|
|
152
|
+
class ShutdownIdleProvidersHandler(BaseProviderHandler):
|
|
153
|
+
"""Handler for ShutdownIdleProvidersCommand."""
|
|
154
|
+
|
|
155
|
+
def handle(self, command: ShutdownIdleProvidersCommand) -> List[str]:
|
|
156
|
+
"""
|
|
157
|
+
Shutdown all idle providers.
|
|
158
|
+
|
|
159
|
+
Returns:
|
|
160
|
+
List of provider IDs that were shutdown
|
|
161
|
+
"""
|
|
162
|
+
shutdown_ids = []
|
|
163
|
+
for provider_id, provider in self._repository.get_all().items():
|
|
164
|
+
if provider.maybe_shutdown_idle():
|
|
165
|
+
shutdown_ids.append(provider_id)
|
|
166
|
+
self._publish_events(provider)
|
|
167
|
+
|
|
168
|
+
return shutdown_ids
|
|
169
|
+
|
|
170
|
+
|
|
171
|
+
def register_all_handlers(command_bus: CommandBus, repository: IProviderRepository, event_bus: EventBus) -> None:
|
|
172
|
+
"""
|
|
173
|
+
Register all command handlers with the command bus.
|
|
174
|
+
|
|
175
|
+
Args:
|
|
176
|
+
command_bus: The command bus to register handlers with
|
|
177
|
+
repository: Provider repository
|
|
178
|
+
event_bus: Event bus for publishing events
|
|
179
|
+
"""
|
|
180
|
+
command_bus.register(StartProviderCommand, StartProviderHandler(repository, event_bus))
|
|
181
|
+
command_bus.register(StopProviderCommand, StopProviderHandler(repository, event_bus))
|
|
182
|
+
command_bus.register(InvokeToolCommand, InvokeToolHandler(repository, event_bus))
|
|
183
|
+
command_bus.register(HealthCheckCommand, HealthCheckHandler(repository, event_bus))
|
|
184
|
+
command_bus.register(
|
|
185
|
+
ShutdownIdleProvidersCommand,
|
|
186
|
+
ShutdownIdleProvidersHandler(repository, event_bus),
|
|
187
|
+
)
|
|
188
|
+
|
|
189
|
+
logger.info("command_handlers_registered")
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
"""Discovery application module.
|
|
2
|
+
|
|
3
|
+
This module contains application layer components for provider discovery,
|
|
4
|
+
including the orchestrator, security validation, and metrics.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from .discovery_metrics import DiscoveryMetrics
|
|
8
|
+
from .discovery_orchestrator import DiscoveryConfig, DiscoveryOrchestrator
|
|
9
|
+
from .lifecycle_manager import DiscoveryLifecycleManager
|
|
10
|
+
from .security_validator import SecurityConfig, SecurityValidator, ValidationReport, ValidationResult
|
|
11
|
+
|
|
12
|
+
__all__ = [
|
|
13
|
+
"DiscoveryOrchestrator",
|
|
14
|
+
"DiscoveryConfig",
|
|
15
|
+
"SecurityValidator",
|
|
16
|
+
"SecurityConfig",
|
|
17
|
+
"ValidationResult",
|
|
18
|
+
"ValidationReport",
|
|
19
|
+
"DiscoveryMetrics",
|
|
20
|
+
"DiscoveryLifecycleManager",
|
|
21
|
+
]
|
|
@@ -0,0 +1,283 @@
|
|
|
1
|
+
"""Discovery Metrics.
|
|
2
|
+
|
|
3
|
+
Prometheus metrics for provider discovery observability.
|
|
4
|
+
Tracks discovery cycles, registrations, conflicts, and validation times.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from functools import wraps
|
|
8
|
+
import time
|
|
9
|
+
from typing import Callable
|
|
10
|
+
|
|
11
|
+
from ...logging_config import get_logger
|
|
12
|
+
|
|
13
|
+
logger = get_logger(__name__)
|
|
14
|
+
|
|
15
|
+
# Optional prometheus dependency
|
|
16
|
+
try:
|
|
17
|
+
from prometheus_client import Counter, Gauge, Histogram
|
|
18
|
+
|
|
19
|
+
PROMETHEUS_AVAILABLE = True
|
|
20
|
+
except ImportError:
|
|
21
|
+
PROMETHEUS_AVAILABLE = False
|
|
22
|
+
# Note: No logging here - module is imported before setup_logging() is called
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class DiscoveryMetrics:
|
|
26
|
+
"""Prometheus metrics for provider discovery.
|
|
27
|
+
|
|
28
|
+
Metrics:
|
|
29
|
+
- mcp_hangar_discovery_providers_total: Gauge of providers per source/status
|
|
30
|
+
- mcp_hangar_discovery_registrations_total: Counter of registrations
|
|
31
|
+
- mcp_hangar_discovery_deregistrations_total: Counter of deregistrations
|
|
32
|
+
- mcp_hangar_discovery_errors_total: Counter of errors
|
|
33
|
+
- mcp_hangar_discovery_conflicts_total: Counter of conflicts
|
|
34
|
+
- mcp_hangar_discovery_quarantine_total: Counter of quarantined providers
|
|
35
|
+
- mcp_hangar_discovery_latency_seconds: Histogram of discovery cycle duration
|
|
36
|
+
- mcp_hangar_discovery_validation_duration_seconds: Histogram of validation time
|
|
37
|
+
"""
|
|
38
|
+
|
|
39
|
+
def __init__(self, prefix: str = "mcp_hangar_discovery"):
|
|
40
|
+
"""Initialize discovery metrics.
|
|
41
|
+
|
|
42
|
+
Args:
|
|
43
|
+
prefix: Metric name prefix
|
|
44
|
+
"""
|
|
45
|
+
self.prefix = prefix
|
|
46
|
+
self._enabled = PROMETHEUS_AVAILABLE
|
|
47
|
+
|
|
48
|
+
if not self._enabled:
|
|
49
|
+
logger.warning("Prometheus metrics disabled (prometheus_client not installed)")
|
|
50
|
+
return
|
|
51
|
+
|
|
52
|
+
# Gauges
|
|
53
|
+
self.providers_total = Gauge(
|
|
54
|
+
f"{prefix}_providers_total",
|
|
55
|
+
"Number of discovered providers",
|
|
56
|
+
["source", "status"],
|
|
57
|
+
)
|
|
58
|
+
|
|
59
|
+
# Counters
|
|
60
|
+
self.registrations_total = Counter(
|
|
61
|
+
f"{prefix}_registrations_total",
|
|
62
|
+
"Total provider registrations from discovery",
|
|
63
|
+
["source"],
|
|
64
|
+
)
|
|
65
|
+
|
|
66
|
+
self.deregistrations_total = Counter(
|
|
67
|
+
f"{prefix}_deregistrations_total",
|
|
68
|
+
"Total provider deregistrations",
|
|
69
|
+
["source", "reason"],
|
|
70
|
+
)
|
|
71
|
+
|
|
72
|
+
self.errors_total = Counter(f"{prefix}_errors_total", "Total discovery errors", ["source", "error_type"])
|
|
73
|
+
|
|
74
|
+
self.conflicts_total = Counter(f"{prefix}_conflicts_total", "Total discovery conflicts", ["type"])
|
|
75
|
+
|
|
76
|
+
self.quarantine_total = Counter(f"{prefix}_quarantine_total", "Total quarantined providers", ["reason"])
|
|
77
|
+
|
|
78
|
+
self.validation_failures_total = Counter(
|
|
79
|
+
f"{prefix}_validation_failures_total",
|
|
80
|
+
"Total validation failures",
|
|
81
|
+
["source", "validation_type"],
|
|
82
|
+
)
|
|
83
|
+
|
|
84
|
+
# Histograms
|
|
85
|
+
self.latency_seconds = Histogram(
|
|
86
|
+
f"{prefix}_latency_seconds",
|
|
87
|
+
"Discovery cycle duration",
|
|
88
|
+
["source"],
|
|
89
|
+
buckets=[0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1.0, 2.5, 5.0, 10.0],
|
|
90
|
+
)
|
|
91
|
+
|
|
92
|
+
self.validation_duration_seconds = Histogram(
|
|
93
|
+
f"{prefix}_validation_duration_seconds",
|
|
94
|
+
"Provider validation duration",
|
|
95
|
+
["source"],
|
|
96
|
+
buckets=[0.1, 0.25, 0.5, 1.0, 2.5, 5.0, 10.0],
|
|
97
|
+
)
|
|
98
|
+
|
|
99
|
+
self.cycle_duration_seconds = Histogram(
|
|
100
|
+
f"{prefix}_cycle_duration_seconds",
|
|
101
|
+
"Full discovery cycle duration",
|
|
102
|
+
buckets=[0.1, 0.5, 1.0, 2.5, 5.0, 10.0, 30.0],
|
|
103
|
+
)
|
|
104
|
+
|
|
105
|
+
def set_providers_count(self, source: str, status: str, count: int) -> None:
|
|
106
|
+
"""Set provider count for a source/status combination.
|
|
107
|
+
|
|
108
|
+
Args:
|
|
109
|
+
source: Discovery source type
|
|
110
|
+
status: Provider status (discovered, registered, etc.)
|
|
111
|
+
count: Number of providers
|
|
112
|
+
"""
|
|
113
|
+
if self._enabled:
|
|
114
|
+
self.providers_total.labels(source=source, status=status).set(count)
|
|
115
|
+
|
|
116
|
+
def inc_registrations(self, source: str) -> None:
|
|
117
|
+
"""Increment registration counter.
|
|
118
|
+
|
|
119
|
+
Args:
|
|
120
|
+
source: Discovery source type
|
|
121
|
+
"""
|
|
122
|
+
if self._enabled:
|
|
123
|
+
self.registrations_total.labels(source=source).inc()
|
|
124
|
+
|
|
125
|
+
def inc_deregistrations(self, source: str, reason: str) -> None:
|
|
126
|
+
"""Increment deregistration counter.
|
|
127
|
+
|
|
128
|
+
Args:
|
|
129
|
+
source: Discovery source type
|
|
130
|
+
reason: Reason for deregistration
|
|
131
|
+
"""
|
|
132
|
+
if self._enabled:
|
|
133
|
+
self.deregistrations_total.labels(source=source, reason=reason).inc()
|
|
134
|
+
|
|
135
|
+
def inc_errors(self, source: str, error_type: str) -> None:
|
|
136
|
+
"""Increment error counter.
|
|
137
|
+
|
|
138
|
+
Args:
|
|
139
|
+
source: Discovery source type
|
|
140
|
+
error_type: Type of error
|
|
141
|
+
"""
|
|
142
|
+
if self._enabled:
|
|
143
|
+
self.errors_total.labels(source=source, error_type=error_type).inc()
|
|
144
|
+
|
|
145
|
+
def inc_conflicts(self, conflict_type: str) -> None:
|
|
146
|
+
"""Increment conflict counter.
|
|
147
|
+
|
|
148
|
+
Args:
|
|
149
|
+
conflict_type: Type of conflict
|
|
150
|
+
"""
|
|
151
|
+
if self._enabled:
|
|
152
|
+
self.conflicts_total.labels(type=conflict_type).inc()
|
|
153
|
+
|
|
154
|
+
def inc_quarantine(self, reason: str) -> None:
|
|
155
|
+
"""Increment quarantine counter.
|
|
156
|
+
|
|
157
|
+
Args:
|
|
158
|
+
reason: Reason for quarantine
|
|
159
|
+
"""
|
|
160
|
+
if self._enabled:
|
|
161
|
+
self.quarantine_total.labels(reason=reason).inc()
|
|
162
|
+
|
|
163
|
+
def inc_validation_failures(self, source: str, validation_type: str) -> None:
|
|
164
|
+
"""Increment validation failure counter.
|
|
165
|
+
|
|
166
|
+
Args:
|
|
167
|
+
source: Discovery source type
|
|
168
|
+
validation_type: Type of validation that failed
|
|
169
|
+
"""
|
|
170
|
+
if self._enabled:
|
|
171
|
+
self.validation_failures_total.labels(source=source, validation_type=validation_type).inc()
|
|
172
|
+
|
|
173
|
+
def observe_latency(self, source: str, duration_seconds: float) -> None:
|
|
174
|
+
"""Record discovery latency.
|
|
175
|
+
|
|
176
|
+
Args:
|
|
177
|
+
source: Discovery source type
|
|
178
|
+
duration_seconds: Duration in seconds
|
|
179
|
+
"""
|
|
180
|
+
if self._enabled:
|
|
181
|
+
self.latency_seconds.labels(source=source).observe(duration_seconds)
|
|
182
|
+
|
|
183
|
+
def observe_validation_duration(self, source: str, duration_seconds: float) -> None:
|
|
184
|
+
"""Record validation duration.
|
|
185
|
+
|
|
186
|
+
Args:
|
|
187
|
+
source: Discovery source type
|
|
188
|
+
duration_seconds: Duration in seconds
|
|
189
|
+
"""
|
|
190
|
+
if self._enabled:
|
|
191
|
+
self.validation_duration_seconds.labels(source=source).observe(duration_seconds)
|
|
192
|
+
|
|
193
|
+
def observe_cycle_duration(self, duration_seconds: float) -> None:
|
|
194
|
+
"""Record full discovery cycle duration.
|
|
195
|
+
|
|
196
|
+
Args:
|
|
197
|
+
duration_seconds: Duration in seconds
|
|
198
|
+
"""
|
|
199
|
+
if self._enabled:
|
|
200
|
+
self.cycle_duration_seconds.observe(duration_seconds)
|
|
201
|
+
|
|
202
|
+
|
|
203
|
+
# Global metrics instance
|
|
204
|
+
_metrics: DiscoveryMetrics = None
|
|
205
|
+
|
|
206
|
+
|
|
207
|
+
def get_discovery_metrics() -> DiscoveryMetrics:
|
|
208
|
+
"""Get or create global discovery metrics instance.
|
|
209
|
+
|
|
210
|
+
Returns:
|
|
211
|
+
DiscoveryMetrics instance
|
|
212
|
+
"""
|
|
213
|
+
global _metrics
|
|
214
|
+
if _metrics is None:
|
|
215
|
+
_metrics = DiscoveryMetrics()
|
|
216
|
+
return _metrics
|
|
217
|
+
|
|
218
|
+
|
|
219
|
+
def observe_discovery(source_type: str):
|
|
220
|
+
"""Decorator to observe discovery cycle metrics.
|
|
221
|
+
|
|
222
|
+
Args:
|
|
223
|
+
source_type: Discovery source type for labeling
|
|
224
|
+
|
|
225
|
+
Returns:
|
|
226
|
+
Decorator function
|
|
227
|
+
"""
|
|
228
|
+
|
|
229
|
+
def decorator(func: Callable) -> Callable:
|
|
230
|
+
@wraps(func)
|
|
231
|
+
async def wrapper(*args, **kwargs):
|
|
232
|
+
metrics = get_discovery_metrics()
|
|
233
|
+
start = time.perf_counter()
|
|
234
|
+
|
|
235
|
+
try:
|
|
236
|
+
result = await func(*args, **kwargs)
|
|
237
|
+
|
|
238
|
+
# Update provider count if result is a list
|
|
239
|
+
if isinstance(result, list):
|
|
240
|
+
metrics.set_providers_count(source=source_type, status="discovered", count=len(result))
|
|
241
|
+
|
|
242
|
+
return result
|
|
243
|
+
|
|
244
|
+
except Exception as e:
|
|
245
|
+
metrics.inc_errors(source=source_type, error_type=type(e).__name__)
|
|
246
|
+
raise
|
|
247
|
+
|
|
248
|
+
finally:
|
|
249
|
+
duration = time.perf_counter() - start
|
|
250
|
+
metrics.observe_latency(source=source_type, duration_seconds=duration)
|
|
251
|
+
|
|
252
|
+
return wrapper
|
|
253
|
+
|
|
254
|
+
return decorator
|
|
255
|
+
|
|
256
|
+
|
|
257
|
+
def observe_validation(source_type: str):
|
|
258
|
+
"""Decorator to observe validation metrics.
|
|
259
|
+
|
|
260
|
+
Args:
|
|
261
|
+
source_type: Discovery source type for labeling
|
|
262
|
+
|
|
263
|
+
Returns:
|
|
264
|
+
Decorator function
|
|
265
|
+
"""
|
|
266
|
+
|
|
267
|
+
def decorator(func: Callable) -> Callable:
|
|
268
|
+
@wraps(func)
|
|
269
|
+
async def wrapper(*args, **kwargs):
|
|
270
|
+
metrics = get_discovery_metrics()
|
|
271
|
+
start = time.perf_counter()
|
|
272
|
+
|
|
273
|
+
try:
|
|
274
|
+
result = await func(*args, **kwargs)
|
|
275
|
+
return result
|
|
276
|
+
|
|
277
|
+
finally:
|
|
278
|
+
duration = time.perf_counter() - start
|
|
279
|
+
metrics.observe_validation_duration(source=source_type, duration_seconds=duration)
|
|
280
|
+
|
|
281
|
+
return wrapper
|
|
282
|
+
|
|
283
|
+
return decorator
|