mcp-hangar 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mcp_hangar/__init__.py +139 -0
- mcp_hangar/application/__init__.py +1 -0
- mcp_hangar/application/commands/__init__.py +67 -0
- mcp_hangar/application/commands/auth_commands.py +118 -0
- mcp_hangar/application/commands/auth_handlers.py +296 -0
- mcp_hangar/application/commands/commands.py +59 -0
- mcp_hangar/application/commands/handlers.py +189 -0
- mcp_hangar/application/discovery/__init__.py +21 -0
- mcp_hangar/application/discovery/discovery_metrics.py +283 -0
- mcp_hangar/application/discovery/discovery_orchestrator.py +497 -0
- mcp_hangar/application/discovery/lifecycle_manager.py +315 -0
- mcp_hangar/application/discovery/security_validator.py +414 -0
- mcp_hangar/application/event_handlers/__init__.py +50 -0
- mcp_hangar/application/event_handlers/alert_handler.py +191 -0
- mcp_hangar/application/event_handlers/audit_handler.py +203 -0
- mcp_hangar/application/event_handlers/knowledge_base_handler.py +120 -0
- mcp_hangar/application/event_handlers/logging_handler.py +69 -0
- mcp_hangar/application/event_handlers/metrics_handler.py +152 -0
- mcp_hangar/application/event_handlers/persistent_audit_store.py +217 -0
- mcp_hangar/application/event_handlers/security_handler.py +604 -0
- mcp_hangar/application/mcp/tooling.py +158 -0
- mcp_hangar/application/ports/__init__.py +9 -0
- mcp_hangar/application/ports/observability.py +237 -0
- mcp_hangar/application/queries/__init__.py +52 -0
- mcp_hangar/application/queries/auth_handlers.py +237 -0
- mcp_hangar/application/queries/auth_queries.py +118 -0
- mcp_hangar/application/queries/handlers.py +227 -0
- mcp_hangar/application/read_models/__init__.py +11 -0
- mcp_hangar/application/read_models/provider_views.py +139 -0
- mcp_hangar/application/sagas/__init__.py +11 -0
- mcp_hangar/application/sagas/group_rebalance_saga.py +137 -0
- mcp_hangar/application/sagas/provider_failover_saga.py +266 -0
- mcp_hangar/application/sagas/provider_recovery_saga.py +172 -0
- mcp_hangar/application/services/__init__.py +9 -0
- mcp_hangar/application/services/provider_service.py +208 -0
- mcp_hangar/application/services/traced_provider_service.py +211 -0
- mcp_hangar/bootstrap/runtime.py +328 -0
- mcp_hangar/context.py +178 -0
- mcp_hangar/domain/__init__.py +117 -0
- mcp_hangar/domain/contracts/__init__.py +57 -0
- mcp_hangar/domain/contracts/authentication.py +225 -0
- mcp_hangar/domain/contracts/authorization.py +229 -0
- mcp_hangar/domain/contracts/event_store.py +178 -0
- mcp_hangar/domain/contracts/metrics_publisher.py +59 -0
- mcp_hangar/domain/contracts/persistence.py +383 -0
- mcp_hangar/domain/contracts/provider_runtime.py +146 -0
- mcp_hangar/domain/discovery/__init__.py +20 -0
- mcp_hangar/domain/discovery/conflict_resolver.py +267 -0
- mcp_hangar/domain/discovery/discovered_provider.py +185 -0
- mcp_hangar/domain/discovery/discovery_service.py +412 -0
- mcp_hangar/domain/discovery/discovery_source.py +192 -0
- mcp_hangar/domain/events.py +433 -0
- mcp_hangar/domain/exceptions.py +525 -0
- mcp_hangar/domain/model/__init__.py +70 -0
- mcp_hangar/domain/model/aggregate.py +58 -0
- mcp_hangar/domain/model/circuit_breaker.py +152 -0
- mcp_hangar/domain/model/event_sourced_api_key.py +413 -0
- mcp_hangar/domain/model/event_sourced_provider.py +423 -0
- mcp_hangar/domain/model/event_sourced_role_assignment.py +268 -0
- mcp_hangar/domain/model/health_tracker.py +183 -0
- mcp_hangar/domain/model/load_balancer.py +185 -0
- mcp_hangar/domain/model/provider.py +810 -0
- mcp_hangar/domain/model/provider_group.py +656 -0
- mcp_hangar/domain/model/tool_catalog.py +105 -0
- mcp_hangar/domain/policies/__init__.py +19 -0
- mcp_hangar/domain/policies/provider_health.py +187 -0
- mcp_hangar/domain/repository.py +249 -0
- mcp_hangar/domain/security/__init__.py +85 -0
- mcp_hangar/domain/security/input_validator.py +710 -0
- mcp_hangar/domain/security/rate_limiter.py +387 -0
- mcp_hangar/domain/security/roles.py +237 -0
- mcp_hangar/domain/security/sanitizer.py +387 -0
- mcp_hangar/domain/security/secrets.py +501 -0
- mcp_hangar/domain/services/__init__.py +20 -0
- mcp_hangar/domain/services/audit_service.py +376 -0
- mcp_hangar/domain/services/image_builder.py +328 -0
- mcp_hangar/domain/services/provider_launcher.py +1046 -0
- mcp_hangar/domain/value_objects.py +1138 -0
- mcp_hangar/errors.py +818 -0
- mcp_hangar/fastmcp_server.py +1105 -0
- mcp_hangar/gc.py +134 -0
- mcp_hangar/infrastructure/__init__.py +79 -0
- mcp_hangar/infrastructure/async_executor.py +133 -0
- mcp_hangar/infrastructure/auth/__init__.py +37 -0
- mcp_hangar/infrastructure/auth/api_key_authenticator.py +388 -0
- mcp_hangar/infrastructure/auth/event_sourced_store.py +567 -0
- mcp_hangar/infrastructure/auth/jwt_authenticator.py +360 -0
- mcp_hangar/infrastructure/auth/middleware.py +340 -0
- mcp_hangar/infrastructure/auth/opa_authorizer.py +243 -0
- mcp_hangar/infrastructure/auth/postgres_store.py +659 -0
- mcp_hangar/infrastructure/auth/projections.py +366 -0
- mcp_hangar/infrastructure/auth/rate_limiter.py +311 -0
- mcp_hangar/infrastructure/auth/rbac_authorizer.py +323 -0
- mcp_hangar/infrastructure/auth/sqlite_store.py +624 -0
- mcp_hangar/infrastructure/command_bus.py +112 -0
- mcp_hangar/infrastructure/discovery/__init__.py +110 -0
- mcp_hangar/infrastructure/discovery/docker_source.py +289 -0
- mcp_hangar/infrastructure/discovery/entrypoint_source.py +249 -0
- mcp_hangar/infrastructure/discovery/filesystem_source.py +383 -0
- mcp_hangar/infrastructure/discovery/kubernetes_source.py +247 -0
- mcp_hangar/infrastructure/event_bus.py +260 -0
- mcp_hangar/infrastructure/event_sourced_repository.py +443 -0
- mcp_hangar/infrastructure/event_store.py +396 -0
- mcp_hangar/infrastructure/knowledge_base/__init__.py +259 -0
- mcp_hangar/infrastructure/knowledge_base/contracts.py +202 -0
- mcp_hangar/infrastructure/knowledge_base/memory.py +177 -0
- mcp_hangar/infrastructure/knowledge_base/postgres.py +545 -0
- mcp_hangar/infrastructure/knowledge_base/sqlite.py +513 -0
- mcp_hangar/infrastructure/metrics_publisher.py +36 -0
- mcp_hangar/infrastructure/observability/__init__.py +10 -0
- mcp_hangar/infrastructure/observability/langfuse_adapter.py +534 -0
- mcp_hangar/infrastructure/persistence/__init__.py +33 -0
- mcp_hangar/infrastructure/persistence/audit_repository.py +371 -0
- mcp_hangar/infrastructure/persistence/config_repository.py +398 -0
- mcp_hangar/infrastructure/persistence/database.py +333 -0
- mcp_hangar/infrastructure/persistence/database_common.py +330 -0
- mcp_hangar/infrastructure/persistence/event_serializer.py +280 -0
- mcp_hangar/infrastructure/persistence/event_upcaster.py +166 -0
- mcp_hangar/infrastructure/persistence/in_memory_event_store.py +150 -0
- mcp_hangar/infrastructure/persistence/recovery_service.py +312 -0
- mcp_hangar/infrastructure/persistence/sqlite_event_store.py +386 -0
- mcp_hangar/infrastructure/persistence/unit_of_work.py +409 -0
- mcp_hangar/infrastructure/persistence/upcasters/README.md +13 -0
- mcp_hangar/infrastructure/persistence/upcasters/__init__.py +7 -0
- mcp_hangar/infrastructure/query_bus.py +153 -0
- mcp_hangar/infrastructure/saga_manager.py +401 -0
- mcp_hangar/logging_config.py +209 -0
- mcp_hangar/metrics.py +1007 -0
- mcp_hangar/models.py +31 -0
- mcp_hangar/observability/__init__.py +54 -0
- mcp_hangar/observability/health.py +487 -0
- mcp_hangar/observability/metrics.py +319 -0
- mcp_hangar/observability/tracing.py +433 -0
- mcp_hangar/progress.py +542 -0
- mcp_hangar/retry.py +613 -0
- mcp_hangar/server/__init__.py +120 -0
- mcp_hangar/server/__main__.py +6 -0
- mcp_hangar/server/auth_bootstrap.py +340 -0
- mcp_hangar/server/auth_cli.py +335 -0
- mcp_hangar/server/auth_config.py +305 -0
- mcp_hangar/server/bootstrap.py +735 -0
- mcp_hangar/server/cli.py +161 -0
- mcp_hangar/server/config.py +224 -0
- mcp_hangar/server/context.py +215 -0
- mcp_hangar/server/http_auth_middleware.py +165 -0
- mcp_hangar/server/lifecycle.py +467 -0
- mcp_hangar/server/state.py +117 -0
- mcp_hangar/server/tools/__init__.py +16 -0
- mcp_hangar/server/tools/discovery.py +186 -0
- mcp_hangar/server/tools/groups.py +75 -0
- mcp_hangar/server/tools/health.py +301 -0
- mcp_hangar/server/tools/provider.py +939 -0
- mcp_hangar/server/tools/registry.py +320 -0
- mcp_hangar/server/validation.py +113 -0
- mcp_hangar/stdio_client.py +229 -0
- mcp_hangar-0.2.0.dist-info/METADATA +347 -0
- mcp_hangar-0.2.0.dist-info/RECORD +160 -0
- mcp_hangar-0.2.0.dist-info/WHEEL +4 -0
- mcp_hangar-0.2.0.dist-info/entry_points.txt +2 -0
- mcp_hangar-0.2.0.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,414 @@
|
|
|
1
|
+
"""Security Validator for Discovery.
|
|
2
|
+
|
|
3
|
+
Validates discovered providers before registration.
|
|
4
|
+
Implements a multi-stage validation pipeline with security controls.
|
|
5
|
+
|
|
6
|
+
Validation Pipeline:
|
|
7
|
+
1. Source Validation - Is the source trusted?
|
|
8
|
+
2. Rate Limit Check - Is this source flooding?
|
|
9
|
+
3. Health Check - Does the provider respond?
|
|
10
|
+
4. Schema Validation - Does it implement MCP correctly?
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
import asyncio
|
|
14
|
+
from dataclasses import dataclass, field
|
|
15
|
+
from enum import Enum
|
|
16
|
+
import time
|
|
17
|
+
from typing import Any, Dict, List, Optional, Set
|
|
18
|
+
|
|
19
|
+
from mcp_hangar.domain.discovery.discovered_provider import DiscoveredProvider
|
|
20
|
+
|
|
21
|
+
from ...logging_config import get_logger
|
|
22
|
+
|
|
23
|
+
logger = get_logger(__name__)
|
|
24
|
+
|
|
25
|
+
# Optional aiohttp dependency
|
|
26
|
+
try:
|
|
27
|
+
import aiohttp
|
|
28
|
+
|
|
29
|
+
AIOHTTP_AVAILABLE = True
|
|
30
|
+
except ImportError:
|
|
31
|
+
AIOHTTP_AVAILABLE = False
|
|
32
|
+
# Note: No logging here - module is imported before setup_logging() is called
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
class ValidationResult(Enum):
|
|
36
|
+
"""Result of validation pipeline."""
|
|
37
|
+
|
|
38
|
+
PASSED = "passed"
|
|
39
|
+
FAILED_SOURCE = "failed_source"
|
|
40
|
+
FAILED_HEALTH = "failed_health"
|
|
41
|
+
FAILED_SCHEMA = "failed_schema"
|
|
42
|
+
FAILED_RATE_LIMIT = "failed_rate_limit"
|
|
43
|
+
SKIPPED = "skipped"
|
|
44
|
+
|
|
45
|
+
def __str__(self) -> str:
|
|
46
|
+
return self.value
|
|
47
|
+
|
|
48
|
+
@property
|
|
49
|
+
def is_passed(self) -> bool:
|
|
50
|
+
return self in (ValidationResult.PASSED, ValidationResult.SKIPPED)
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
@dataclass
|
|
54
|
+
class ValidationReport:
|
|
55
|
+
"""Report from validation pipeline.
|
|
56
|
+
|
|
57
|
+
Attributes:
|
|
58
|
+
result: Validation result
|
|
59
|
+
provider: Provider being validated
|
|
60
|
+
reason: Human-readable explanation
|
|
61
|
+
details: Additional details (URLs, errors, etc.)
|
|
62
|
+
duration_ms: Validation duration in milliseconds
|
|
63
|
+
"""
|
|
64
|
+
|
|
65
|
+
result: ValidationResult
|
|
66
|
+
provider: DiscoveredProvider
|
|
67
|
+
reason: str
|
|
68
|
+
details: Optional[Dict[str, Any]] = None
|
|
69
|
+
duration_ms: float = 0.0
|
|
70
|
+
|
|
71
|
+
@property
|
|
72
|
+
def is_passed(self) -> bool:
|
|
73
|
+
return self.result.is_passed
|
|
74
|
+
|
|
75
|
+
def to_dict(self) -> Dict[str, Any]:
|
|
76
|
+
"""Convert to dictionary for serialization."""
|
|
77
|
+
return {
|
|
78
|
+
"result": self.result.value,
|
|
79
|
+
"provider_name": self.provider.name,
|
|
80
|
+
"reason": self.reason,
|
|
81
|
+
"details": self.details,
|
|
82
|
+
"duration_ms": self.duration_ms,
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
@dataclass
|
|
87
|
+
class SecurityConfig:
|
|
88
|
+
"""Security configuration for validation.
|
|
89
|
+
|
|
90
|
+
Attributes:
|
|
91
|
+
allowed_namespaces: Whitelist of K8s namespaces
|
|
92
|
+
denied_namespaces: Blacklist of K8s namespaces
|
|
93
|
+
require_health_check: Whether to require health check pass
|
|
94
|
+
require_mcp_schema: Whether to validate MCP schema
|
|
95
|
+
max_providers_per_source: Max providers from single source
|
|
96
|
+
max_registration_rate: Max registrations per minute per source
|
|
97
|
+
health_check_timeout_s: Health check timeout in seconds
|
|
98
|
+
quarantine_on_failure: Whether to quarantine failed providers
|
|
99
|
+
"""
|
|
100
|
+
|
|
101
|
+
allowed_namespaces: Set[str] = field(default_factory=set)
|
|
102
|
+
denied_namespaces: Set[str] = field(default_factory=lambda: {"kube-system", "default"})
|
|
103
|
+
require_health_check: bool = True
|
|
104
|
+
require_mcp_schema: bool = False
|
|
105
|
+
max_providers_per_source: int = 100
|
|
106
|
+
max_registration_rate: int = 10 # per minute
|
|
107
|
+
health_check_timeout_s: float = 5.0
|
|
108
|
+
quarantine_on_failure: bool = True
|
|
109
|
+
|
|
110
|
+
@classmethod
|
|
111
|
+
def from_dict(cls, data: Dict[str, Any]) -> "SecurityConfig":
|
|
112
|
+
"""Create from dictionary."""
|
|
113
|
+
return cls(
|
|
114
|
+
allowed_namespaces=set(data.get("allowed_namespaces", [])),
|
|
115
|
+
denied_namespaces=set(data.get("denied_namespaces", ["kube-system", "default"])),
|
|
116
|
+
require_health_check=data.get("require_health_check", True),
|
|
117
|
+
require_mcp_schema=data.get("require_mcp_schema", False),
|
|
118
|
+
max_providers_per_source=data.get("max_providers_per_source", 100),
|
|
119
|
+
max_registration_rate=data.get("max_registration_rate", 10),
|
|
120
|
+
health_check_timeout_s=data.get("health_check_timeout_s", 5.0),
|
|
121
|
+
quarantine_on_failure=data.get("quarantine_on_failure", True),
|
|
122
|
+
)
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
class SecurityValidator:
|
|
126
|
+
"""Validates discovered providers before registration.
|
|
127
|
+
|
|
128
|
+
Implements a multi-stage validation pipeline:
|
|
129
|
+
1. Source Validation - Namespace whitelist/blacklist
|
|
130
|
+
2. Rate Limit Check - Prevent registration floods
|
|
131
|
+
3. Health Check - Verify provider is responsive
|
|
132
|
+
4. Schema Validation - Verify MCP compliance
|
|
133
|
+
|
|
134
|
+
Usage:
|
|
135
|
+
validator = SecurityValidator(config)
|
|
136
|
+
report = await validator.validate(provider)
|
|
137
|
+
if report.is_passed:
|
|
138
|
+
# Register provider
|
|
139
|
+
else:
|
|
140
|
+
# Quarantine or reject
|
|
141
|
+
"""
|
|
142
|
+
|
|
143
|
+
def __init__(self, config: Optional[SecurityConfig] = None):
|
|
144
|
+
"""Initialize security validator.
|
|
145
|
+
|
|
146
|
+
Args:
|
|
147
|
+
config: Security configuration
|
|
148
|
+
"""
|
|
149
|
+
self.config = config or SecurityConfig()
|
|
150
|
+
|
|
151
|
+
# Rate limiting state: source -> list of timestamps
|
|
152
|
+
self._registration_counts: Dict[str, List[float]] = {}
|
|
153
|
+
|
|
154
|
+
# Provider counts per source
|
|
155
|
+
self._provider_counts: Dict[str, int] = {}
|
|
156
|
+
|
|
157
|
+
async def validate(self, provider: DiscoveredProvider) -> ValidationReport:
|
|
158
|
+
"""Run full validation pipeline.
|
|
159
|
+
|
|
160
|
+
Args:
|
|
161
|
+
provider: Provider to validate
|
|
162
|
+
|
|
163
|
+
Returns:
|
|
164
|
+
ValidationReport with result and details
|
|
165
|
+
"""
|
|
166
|
+
start_time = time.perf_counter()
|
|
167
|
+
|
|
168
|
+
# Step 1: Source validation
|
|
169
|
+
source_result = self._validate_source(provider)
|
|
170
|
+
if source_result:
|
|
171
|
+
source_result.duration_ms = (time.perf_counter() - start_time) * 1000
|
|
172
|
+
return source_result
|
|
173
|
+
|
|
174
|
+
# Step 2: Rate limit check
|
|
175
|
+
rate_result = self._check_rate_limit(provider)
|
|
176
|
+
if rate_result:
|
|
177
|
+
rate_result.duration_ms = (time.perf_counter() - start_time) * 1000
|
|
178
|
+
return rate_result
|
|
179
|
+
|
|
180
|
+
# Step 3: Provider count check
|
|
181
|
+
count_result = self._check_provider_count(provider)
|
|
182
|
+
if count_result:
|
|
183
|
+
count_result.duration_ms = (time.perf_counter() - start_time) * 1000
|
|
184
|
+
return count_result
|
|
185
|
+
|
|
186
|
+
# Step 4: Health check (for HTTP providers)
|
|
187
|
+
if self.config.require_health_check:
|
|
188
|
+
health_result = await self._validate_health(provider)
|
|
189
|
+
if health_result:
|
|
190
|
+
health_result.duration_ms = (time.perf_counter() - start_time) * 1000
|
|
191
|
+
return health_result
|
|
192
|
+
|
|
193
|
+
# Step 5: MCP schema validation
|
|
194
|
+
if self.config.require_mcp_schema:
|
|
195
|
+
schema_result = await self._validate_schema(provider)
|
|
196
|
+
if schema_result:
|
|
197
|
+
schema_result.duration_ms = (time.perf_counter() - start_time) * 1000
|
|
198
|
+
return schema_result
|
|
199
|
+
|
|
200
|
+
# All checks passed
|
|
201
|
+
duration_ms = (time.perf_counter() - start_time) * 1000
|
|
202
|
+
return ValidationReport(
|
|
203
|
+
result=ValidationResult.PASSED,
|
|
204
|
+
provider=provider,
|
|
205
|
+
reason="All validation checks passed",
|
|
206
|
+
duration_ms=duration_ms,
|
|
207
|
+
)
|
|
208
|
+
|
|
209
|
+
def _validate_source(self, provider: DiscoveredProvider) -> Optional[ValidationReport]:
|
|
210
|
+
"""Validate source is trusted.
|
|
211
|
+
|
|
212
|
+
Args:
|
|
213
|
+
provider: Provider to validate
|
|
214
|
+
|
|
215
|
+
Returns:
|
|
216
|
+
ValidationReport if failed, None if passed
|
|
217
|
+
"""
|
|
218
|
+
# Kubernetes namespace checks
|
|
219
|
+
if provider.source_type == "kubernetes":
|
|
220
|
+
namespace = provider.metadata.get("namespace", "")
|
|
221
|
+
|
|
222
|
+
# Check denied list first
|
|
223
|
+
if namespace in self.config.denied_namespaces:
|
|
224
|
+
return ValidationReport(
|
|
225
|
+
result=ValidationResult.FAILED_SOURCE,
|
|
226
|
+
provider=provider,
|
|
227
|
+
reason=f"Namespace '{namespace}' is in denied list",
|
|
228
|
+
details={
|
|
229
|
+
"namespace": namespace,
|
|
230
|
+
"denied_namespaces": list(self.config.denied_namespaces),
|
|
231
|
+
},
|
|
232
|
+
)
|
|
233
|
+
|
|
234
|
+
# If allowed list is specified, check it
|
|
235
|
+
if self.config.allowed_namespaces and namespace not in self.config.allowed_namespaces:
|
|
236
|
+
return ValidationReport(
|
|
237
|
+
result=ValidationResult.FAILED_SOURCE,
|
|
238
|
+
provider=provider,
|
|
239
|
+
reason=f"Namespace '{namespace}' is not in allowed list",
|
|
240
|
+
details={
|
|
241
|
+
"namespace": namespace,
|
|
242
|
+
"allowed_namespaces": list(self.config.allowed_namespaces),
|
|
243
|
+
},
|
|
244
|
+
)
|
|
245
|
+
|
|
246
|
+
return None
|
|
247
|
+
|
|
248
|
+
def _check_rate_limit(self, provider: DiscoveredProvider) -> Optional[ValidationReport]:
|
|
249
|
+
"""Check registration rate limit.
|
|
250
|
+
|
|
251
|
+
Args:
|
|
252
|
+
provider: Provider to validate
|
|
253
|
+
|
|
254
|
+
Returns:
|
|
255
|
+
ValidationReport if rate exceeded, None if within limit
|
|
256
|
+
"""
|
|
257
|
+
source = provider.source_type
|
|
258
|
+
now = time.time()
|
|
259
|
+
window = 60.0 # 1 minute window
|
|
260
|
+
|
|
261
|
+
# Initialize if needed
|
|
262
|
+
if source not in self._registration_counts:
|
|
263
|
+
self._registration_counts[source] = []
|
|
264
|
+
|
|
265
|
+
# Clean old entries
|
|
266
|
+
self._registration_counts[source] = [t for t in self._registration_counts[source] if now - t < window]
|
|
267
|
+
|
|
268
|
+
# Check rate
|
|
269
|
+
if len(self._registration_counts[source]) >= self.config.max_registration_rate:
|
|
270
|
+
return ValidationReport(
|
|
271
|
+
result=ValidationResult.FAILED_RATE_LIMIT,
|
|
272
|
+
provider=provider,
|
|
273
|
+
reason=f"Rate limit exceeded for source '{source}'",
|
|
274
|
+
details={
|
|
275
|
+
"source": source,
|
|
276
|
+
"current_rate": len(self._registration_counts[source]),
|
|
277
|
+
"max_rate": self.config.max_registration_rate,
|
|
278
|
+
"window_seconds": window,
|
|
279
|
+
},
|
|
280
|
+
)
|
|
281
|
+
|
|
282
|
+
# Record this registration attempt
|
|
283
|
+
self._registration_counts[source].append(now)
|
|
284
|
+
return None
|
|
285
|
+
|
|
286
|
+
def _check_provider_count(self, provider: DiscoveredProvider) -> Optional[ValidationReport]:
|
|
287
|
+
"""Check provider count per source.
|
|
288
|
+
|
|
289
|
+
Args:
|
|
290
|
+
provider: Provider to validate
|
|
291
|
+
|
|
292
|
+
Returns:
|
|
293
|
+
ValidationReport if count exceeded, None if within limit
|
|
294
|
+
"""
|
|
295
|
+
source = provider.source_type
|
|
296
|
+
current_count = self._provider_counts.get(source, 0)
|
|
297
|
+
|
|
298
|
+
if current_count >= self.config.max_providers_per_source:
|
|
299
|
+
return ValidationReport(
|
|
300
|
+
result=ValidationResult.FAILED_RATE_LIMIT,
|
|
301
|
+
provider=provider,
|
|
302
|
+
reason=f"Max providers exceeded for source '{source}'",
|
|
303
|
+
details={
|
|
304
|
+
"source": source,
|
|
305
|
+
"current_count": current_count,
|
|
306
|
+
"max_count": self.config.max_providers_per_source,
|
|
307
|
+
},
|
|
308
|
+
)
|
|
309
|
+
|
|
310
|
+
return None
|
|
311
|
+
|
|
312
|
+
async def _validate_health(self, provider: DiscoveredProvider) -> Optional[ValidationReport]:
|
|
313
|
+
"""Validate provider health endpoint.
|
|
314
|
+
|
|
315
|
+
Args:
|
|
316
|
+
provider: Provider to validate
|
|
317
|
+
|
|
318
|
+
Returns:
|
|
319
|
+
ValidationReport if health check failed, None if passed
|
|
320
|
+
"""
|
|
321
|
+
# Only check HTTP-based providers
|
|
322
|
+
if provider.mode not in ("http", "sse", "remote"):
|
|
323
|
+
return None
|
|
324
|
+
|
|
325
|
+
if not AIOHTTP_AVAILABLE:
|
|
326
|
+
logger.debug(f"Skipping health check for {provider.name} (aiohttp not available)")
|
|
327
|
+
return None
|
|
328
|
+
|
|
329
|
+
host = provider.connection_info.get("host")
|
|
330
|
+
port = provider.connection_info.get("port")
|
|
331
|
+
health_path = provider.connection_info.get("health_path", "/health")
|
|
332
|
+
|
|
333
|
+
if not host or not port:
|
|
334
|
+
return ValidationReport(
|
|
335
|
+
result=ValidationResult.FAILED_HEALTH,
|
|
336
|
+
provider=provider,
|
|
337
|
+
reason="Missing host or port in connection_info",
|
|
338
|
+
details={"connection_info": provider.connection_info},
|
|
339
|
+
)
|
|
340
|
+
|
|
341
|
+
url = f"http://{host}:{port}{health_path}"
|
|
342
|
+
|
|
343
|
+
try:
|
|
344
|
+
timeout = aiohttp.ClientTimeout(total=self.config.health_check_timeout_s)
|
|
345
|
+
|
|
346
|
+
async with aiohttp.ClientSession(timeout=timeout) as session:
|
|
347
|
+
async with session.get(url) as response:
|
|
348
|
+
if response.status != 200:
|
|
349
|
+
return ValidationReport(
|
|
350
|
+
result=ValidationResult.FAILED_HEALTH,
|
|
351
|
+
provider=provider,
|
|
352
|
+
reason=f"Health check returned status {response.status}",
|
|
353
|
+
details={"url": url, "status": response.status},
|
|
354
|
+
)
|
|
355
|
+
|
|
356
|
+
except asyncio.TimeoutError:
|
|
357
|
+
return ValidationReport(
|
|
358
|
+
result=ValidationResult.FAILED_HEALTH,
|
|
359
|
+
provider=provider,
|
|
360
|
+
reason="Health check timed out",
|
|
361
|
+
details={"url": url, "timeout": self.config.health_check_timeout_s},
|
|
362
|
+
)
|
|
363
|
+
except Exception as e:
|
|
364
|
+
return ValidationReport(
|
|
365
|
+
result=ValidationResult.FAILED_HEALTH,
|
|
366
|
+
provider=provider,
|
|
367
|
+
reason=f"Health check failed: {e}",
|
|
368
|
+
details={"url": url, "error": str(e)},
|
|
369
|
+
)
|
|
370
|
+
|
|
371
|
+
return None
|
|
372
|
+
|
|
373
|
+
async def _validate_schema(self, provider: DiscoveredProvider) -> Optional[ValidationReport]:
|
|
374
|
+
"""Validate MCP tools schema.
|
|
375
|
+
|
|
376
|
+
Args:
|
|
377
|
+
provider: Provider to validate
|
|
378
|
+
|
|
379
|
+
Returns:
|
|
380
|
+
ValidationReport if schema invalid, None if valid
|
|
381
|
+
"""
|
|
382
|
+
# NOTE: MCP schema validation is intentionally deferred.
|
|
383
|
+
# The provider's tools/list response should be validated against MCP spec,
|
|
384
|
+
# but this requires network calls during registration which adds latency.
|
|
385
|
+
# Schema validation can be done lazily on first tool invocation instead.
|
|
386
|
+
logger.debug(f"Schema validation deferred for {provider.name}")
|
|
387
|
+
return None
|
|
388
|
+
|
|
389
|
+
def record_registration(self, provider: DiscoveredProvider) -> None:
|
|
390
|
+
"""Record successful registration for counting.
|
|
391
|
+
|
|
392
|
+
Args:
|
|
393
|
+
provider: Registered provider
|
|
394
|
+
"""
|
|
395
|
+
source = provider.source_type
|
|
396
|
+
self._provider_counts[source] = self._provider_counts.get(source, 0) + 1
|
|
397
|
+
|
|
398
|
+
def record_deregistration(self, provider: DiscoveredProvider) -> None:
|
|
399
|
+
"""Record deregistration for counting.
|
|
400
|
+
|
|
401
|
+
Args:
|
|
402
|
+
provider: Deregistered provider
|
|
403
|
+
"""
|
|
404
|
+
source = provider.source_type
|
|
405
|
+
if source in self._provider_counts:
|
|
406
|
+
self._provider_counts[source] = max(0, self._provider_counts[source] - 1)
|
|
407
|
+
|
|
408
|
+
def reset_rate_limits(self) -> None:
|
|
409
|
+
"""Reset all rate limit counters."""
|
|
410
|
+
self._registration_counts.clear()
|
|
411
|
+
|
|
412
|
+
def reset_provider_counts(self) -> None:
|
|
413
|
+
"""Reset all provider counts."""
|
|
414
|
+
self._provider_counts.clear()
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
"""Event handlers for reacting to domain events."""
|
|
2
|
+
|
|
3
|
+
from .alert_handler import Alert, AlertEventHandler, AlertSink, CallbackAlertSink, LogAlertSink
|
|
4
|
+
from .audit_handler import AuditEventHandler, AuditRecord, AuditStore, InMemoryAuditStore, LogAuditStore
|
|
5
|
+
from .logging_handler import LoggingEventHandler
|
|
6
|
+
from .metrics_handler import MetricsEventHandler
|
|
7
|
+
from .security_handler import (
|
|
8
|
+
CallbackSecuritySink,
|
|
9
|
+
CompositeSecuritySink,
|
|
10
|
+
get_security_handler,
|
|
11
|
+
InMemorySecuritySink,
|
|
12
|
+
LogSecuritySink,
|
|
13
|
+
reset_security_handler,
|
|
14
|
+
SecurityEvent,
|
|
15
|
+
SecurityEventHandler,
|
|
16
|
+
SecurityEventSink,
|
|
17
|
+
SecurityEventType,
|
|
18
|
+
SecuritySeverity,
|
|
19
|
+
)
|
|
20
|
+
|
|
21
|
+
__all__ = [
|
|
22
|
+
# Logging
|
|
23
|
+
"LoggingEventHandler",
|
|
24
|
+
# Metrics
|
|
25
|
+
"MetricsEventHandler",
|
|
26
|
+
# Alerts
|
|
27
|
+
"AlertEventHandler",
|
|
28
|
+
"Alert",
|
|
29
|
+
"AlertSink",
|
|
30
|
+
"LogAlertSink",
|
|
31
|
+
"CallbackAlertSink",
|
|
32
|
+
# Audit
|
|
33
|
+
"AuditEventHandler",
|
|
34
|
+
"AuditRecord",
|
|
35
|
+
"AuditStore",
|
|
36
|
+
"InMemoryAuditStore",
|
|
37
|
+
"LogAuditStore",
|
|
38
|
+
# Security
|
|
39
|
+
"SecurityEventHandler",
|
|
40
|
+
"SecurityEvent",
|
|
41
|
+
"SecurityEventType",
|
|
42
|
+
"SecuritySeverity",
|
|
43
|
+
"SecurityEventSink",
|
|
44
|
+
"LogSecuritySink",
|
|
45
|
+
"InMemorySecuritySink",
|
|
46
|
+
"CallbackSecuritySink",
|
|
47
|
+
"CompositeSecuritySink",
|
|
48
|
+
"get_security_handler",
|
|
49
|
+
"reset_security_handler",
|
|
50
|
+
]
|
|
@@ -0,0 +1,191 @@
|
|
|
1
|
+
"""Alert event handler for critical notifications."""
|
|
2
|
+
|
|
3
|
+
from abc import ABC, abstractmethod
|
|
4
|
+
from dataclasses import dataclass, field
|
|
5
|
+
from datetime import datetime, timezone
|
|
6
|
+
from typing import Any, Callable, Dict, List, Optional
|
|
7
|
+
|
|
8
|
+
from ...domain.events import DomainEvent, HealthCheckFailed, ProviderDegraded, ProviderStopped, ToolInvocationFailed
|
|
9
|
+
from ...logging_config import get_logger
|
|
10
|
+
|
|
11
|
+
logger = get_logger(__name__)
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
@dataclass
|
|
15
|
+
class Alert:
|
|
16
|
+
"""Represents an alert notification."""
|
|
17
|
+
|
|
18
|
+
level: str # critical, warning, info
|
|
19
|
+
message: str
|
|
20
|
+
provider_id: str
|
|
21
|
+
event_type: str
|
|
22
|
+
timestamp: datetime = field(default_factory=lambda: datetime.now(timezone.utc))
|
|
23
|
+
details: Dict[str, Any] = field(default_factory=dict)
|
|
24
|
+
|
|
25
|
+
def to_dict(self) -> Dict[str, Any]:
|
|
26
|
+
"""Convert alert to dictionary."""
|
|
27
|
+
return {
|
|
28
|
+
"level": self.level,
|
|
29
|
+
"message": self.message,
|
|
30
|
+
"provider_id": self.provider_id,
|
|
31
|
+
"event_type": self.event_type,
|
|
32
|
+
"timestamp": self.timestamp.isoformat(),
|
|
33
|
+
"details": self.details,
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
class AlertSink(ABC):
|
|
38
|
+
"""Abstract interface for alert destinations."""
|
|
39
|
+
|
|
40
|
+
@abstractmethod
|
|
41
|
+
def send(self, alert: Alert) -> None:
|
|
42
|
+
"""Send an alert to the sink."""
|
|
43
|
+
pass
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
class LogAlertSink(AlertSink):
|
|
47
|
+
"""Send alerts to the log."""
|
|
48
|
+
|
|
49
|
+
def send(self, alert: Alert) -> None:
|
|
50
|
+
"""Log the alert."""
|
|
51
|
+
log_method = {
|
|
52
|
+
"critical": logger.critical,
|
|
53
|
+
"warning": logger.warning,
|
|
54
|
+
"info": logger.info,
|
|
55
|
+
}.get(alert.level, logger.info)
|
|
56
|
+
|
|
57
|
+
log_method(
|
|
58
|
+
f"ALERT [{alert.level.upper()}] {alert.message} provider={alert.provider_id} event={alert.event_type}"
|
|
59
|
+
)
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
class CallbackAlertSink(AlertSink):
|
|
63
|
+
"""Send alerts to a callback function."""
|
|
64
|
+
|
|
65
|
+
def __init__(self, callback: Callable[[Alert], None]):
|
|
66
|
+
self._callback = callback
|
|
67
|
+
|
|
68
|
+
def send(self, alert: Alert) -> None:
|
|
69
|
+
"""Call the callback with the alert."""
|
|
70
|
+
self._callback(alert)
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
class AlertEventHandler:
|
|
74
|
+
"""
|
|
75
|
+
Event handler that generates alerts for critical events.
|
|
76
|
+
|
|
77
|
+
Monitors domain events and generates alerts when:
|
|
78
|
+
- Provider is degraded
|
|
79
|
+
- Provider stops unexpectedly
|
|
80
|
+
- Tool invocation fails
|
|
81
|
+
- Health check fails repeatedly
|
|
82
|
+
"""
|
|
83
|
+
|
|
84
|
+
def __init__(
|
|
85
|
+
self,
|
|
86
|
+
sinks: Optional[List[AlertSink]] = None,
|
|
87
|
+
degradation_threshold: int = 3,
|
|
88
|
+
health_failure_threshold: int = 3,
|
|
89
|
+
):
|
|
90
|
+
"""
|
|
91
|
+
Initialize the alert handler.
|
|
92
|
+
|
|
93
|
+
Args:
|
|
94
|
+
sinks: List of alert sinks to send alerts to
|
|
95
|
+
degradation_threshold: Number of failures before critical alert
|
|
96
|
+
health_failure_threshold: Consecutive health failures for warning
|
|
97
|
+
"""
|
|
98
|
+
self._sinks = sinks or [LogAlertSink()]
|
|
99
|
+
self._degradation_threshold = degradation_threshold
|
|
100
|
+
self._health_failure_threshold = health_failure_threshold
|
|
101
|
+
self._alerts_sent: List[Alert] = []
|
|
102
|
+
|
|
103
|
+
def handle(self, event: DomainEvent) -> None:
|
|
104
|
+
"""Handle a domain event and potentially generate alerts."""
|
|
105
|
+
if isinstance(event, ProviderDegraded):
|
|
106
|
+
self._handle_degraded(event)
|
|
107
|
+
elif isinstance(event, ProviderStopped):
|
|
108
|
+
self._handle_stopped(event)
|
|
109
|
+
elif isinstance(event, ToolInvocationFailed):
|
|
110
|
+
self._handle_tool_failed(event)
|
|
111
|
+
elif isinstance(event, HealthCheckFailed):
|
|
112
|
+
self._handle_health_failed(event)
|
|
113
|
+
|
|
114
|
+
def _handle_degraded(self, event: ProviderDegraded) -> None:
|
|
115
|
+
"""Handle provider degraded event."""
|
|
116
|
+
level = "critical" if event.consecutive_failures >= self._degradation_threshold else "warning"
|
|
117
|
+
|
|
118
|
+
alert = Alert(
|
|
119
|
+
level=level,
|
|
120
|
+
message=f"Provider degraded after {event.consecutive_failures} failures",
|
|
121
|
+
provider_id=event.provider_id,
|
|
122
|
+
event_type="ProviderDegraded",
|
|
123
|
+
details={
|
|
124
|
+
"consecutive_failures": event.consecutive_failures,
|
|
125
|
+
"total_failures": event.total_failures,
|
|
126
|
+
"reason": event.reason,
|
|
127
|
+
},
|
|
128
|
+
)
|
|
129
|
+
self._send_alert(alert)
|
|
130
|
+
|
|
131
|
+
def _handle_stopped(self, event: ProviderStopped) -> None:
|
|
132
|
+
"""Handle provider stopped event."""
|
|
133
|
+
# Only alert for unexpected stops (not shutdown or idle)
|
|
134
|
+
if event.reason not in ("shutdown", "idle"):
|
|
135
|
+
alert = Alert(
|
|
136
|
+
level="warning",
|
|
137
|
+
message=f"Provider stopped unexpectedly: {event.reason}",
|
|
138
|
+
provider_id=event.provider_id,
|
|
139
|
+
event_type="ProviderStopped",
|
|
140
|
+
details={"reason": event.reason},
|
|
141
|
+
)
|
|
142
|
+
self._send_alert(alert)
|
|
143
|
+
|
|
144
|
+
def _handle_tool_failed(self, event: ToolInvocationFailed) -> None:
|
|
145
|
+
"""Handle tool invocation failed event."""
|
|
146
|
+
alert = Alert(
|
|
147
|
+
level="warning",
|
|
148
|
+
message=f"Tool invocation failed: {event.tool_name}",
|
|
149
|
+
provider_id=event.provider_id,
|
|
150
|
+
event_type="ToolInvocationFailed",
|
|
151
|
+
details={
|
|
152
|
+
"tool_name": event.tool_name,
|
|
153
|
+
"error_message": event.error_message,
|
|
154
|
+
"error_type": event.error_type,
|
|
155
|
+
"correlation_id": event.correlation_id,
|
|
156
|
+
},
|
|
157
|
+
)
|
|
158
|
+
self._send_alert(alert)
|
|
159
|
+
|
|
160
|
+
def _handle_health_failed(self, event: HealthCheckFailed) -> None:
|
|
161
|
+
"""Handle health check failed event."""
|
|
162
|
+
if event.consecutive_failures >= self._health_failure_threshold:
|
|
163
|
+
alert = Alert(
|
|
164
|
+
level="warning",
|
|
165
|
+
message=f"Health check failed {event.consecutive_failures} times",
|
|
166
|
+
provider_id=event.provider_id,
|
|
167
|
+
event_type="HealthCheckFailed",
|
|
168
|
+
details={
|
|
169
|
+
"consecutive_failures": event.consecutive_failures,
|
|
170
|
+
"error_message": event.error_message,
|
|
171
|
+
},
|
|
172
|
+
)
|
|
173
|
+
self._send_alert(alert)
|
|
174
|
+
|
|
175
|
+
def _send_alert(self, alert: Alert) -> None:
|
|
176
|
+
"""Send alert to all sinks."""
|
|
177
|
+
self._alerts_sent.append(alert)
|
|
178
|
+
for sink in self._sinks:
|
|
179
|
+
try:
|
|
180
|
+
sink.send(alert)
|
|
181
|
+
except Exception as e:
|
|
182
|
+
logger.error(f"Failed to send alert to sink: {e}")
|
|
183
|
+
|
|
184
|
+
@property
|
|
185
|
+
def alerts_sent(self) -> List[Alert]:
|
|
186
|
+
"""Get list of alerts sent (for testing)."""
|
|
187
|
+
return list(self._alerts_sent)
|
|
188
|
+
|
|
189
|
+
def clear_alerts(self) -> None:
|
|
190
|
+
"""Clear sent alerts (for testing)."""
|
|
191
|
+
self._alerts_sent.clear()
|