mcp-hangar 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (160) hide show
  1. mcp_hangar/__init__.py +139 -0
  2. mcp_hangar/application/__init__.py +1 -0
  3. mcp_hangar/application/commands/__init__.py +67 -0
  4. mcp_hangar/application/commands/auth_commands.py +118 -0
  5. mcp_hangar/application/commands/auth_handlers.py +296 -0
  6. mcp_hangar/application/commands/commands.py +59 -0
  7. mcp_hangar/application/commands/handlers.py +189 -0
  8. mcp_hangar/application/discovery/__init__.py +21 -0
  9. mcp_hangar/application/discovery/discovery_metrics.py +283 -0
  10. mcp_hangar/application/discovery/discovery_orchestrator.py +497 -0
  11. mcp_hangar/application/discovery/lifecycle_manager.py +315 -0
  12. mcp_hangar/application/discovery/security_validator.py +414 -0
  13. mcp_hangar/application/event_handlers/__init__.py +50 -0
  14. mcp_hangar/application/event_handlers/alert_handler.py +191 -0
  15. mcp_hangar/application/event_handlers/audit_handler.py +203 -0
  16. mcp_hangar/application/event_handlers/knowledge_base_handler.py +120 -0
  17. mcp_hangar/application/event_handlers/logging_handler.py +69 -0
  18. mcp_hangar/application/event_handlers/metrics_handler.py +152 -0
  19. mcp_hangar/application/event_handlers/persistent_audit_store.py +217 -0
  20. mcp_hangar/application/event_handlers/security_handler.py +604 -0
  21. mcp_hangar/application/mcp/tooling.py +158 -0
  22. mcp_hangar/application/ports/__init__.py +9 -0
  23. mcp_hangar/application/ports/observability.py +237 -0
  24. mcp_hangar/application/queries/__init__.py +52 -0
  25. mcp_hangar/application/queries/auth_handlers.py +237 -0
  26. mcp_hangar/application/queries/auth_queries.py +118 -0
  27. mcp_hangar/application/queries/handlers.py +227 -0
  28. mcp_hangar/application/read_models/__init__.py +11 -0
  29. mcp_hangar/application/read_models/provider_views.py +139 -0
  30. mcp_hangar/application/sagas/__init__.py +11 -0
  31. mcp_hangar/application/sagas/group_rebalance_saga.py +137 -0
  32. mcp_hangar/application/sagas/provider_failover_saga.py +266 -0
  33. mcp_hangar/application/sagas/provider_recovery_saga.py +172 -0
  34. mcp_hangar/application/services/__init__.py +9 -0
  35. mcp_hangar/application/services/provider_service.py +208 -0
  36. mcp_hangar/application/services/traced_provider_service.py +211 -0
  37. mcp_hangar/bootstrap/runtime.py +328 -0
  38. mcp_hangar/context.py +178 -0
  39. mcp_hangar/domain/__init__.py +117 -0
  40. mcp_hangar/domain/contracts/__init__.py +57 -0
  41. mcp_hangar/domain/contracts/authentication.py +225 -0
  42. mcp_hangar/domain/contracts/authorization.py +229 -0
  43. mcp_hangar/domain/contracts/event_store.py +178 -0
  44. mcp_hangar/domain/contracts/metrics_publisher.py +59 -0
  45. mcp_hangar/domain/contracts/persistence.py +383 -0
  46. mcp_hangar/domain/contracts/provider_runtime.py +146 -0
  47. mcp_hangar/domain/discovery/__init__.py +20 -0
  48. mcp_hangar/domain/discovery/conflict_resolver.py +267 -0
  49. mcp_hangar/domain/discovery/discovered_provider.py +185 -0
  50. mcp_hangar/domain/discovery/discovery_service.py +412 -0
  51. mcp_hangar/domain/discovery/discovery_source.py +192 -0
  52. mcp_hangar/domain/events.py +433 -0
  53. mcp_hangar/domain/exceptions.py +525 -0
  54. mcp_hangar/domain/model/__init__.py +70 -0
  55. mcp_hangar/domain/model/aggregate.py +58 -0
  56. mcp_hangar/domain/model/circuit_breaker.py +152 -0
  57. mcp_hangar/domain/model/event_sourced_api_key.py +413 -0
  58. mcp_hangar/domain/model/event_sourced_provider.py +423 -0
  59. mcp_hangar/domain/model/event_sourced_role_assignment.py +268 -0
  60. mcp_hangar/domain/model/health_tracker.py +183 -0
  61. mcp_hangar/domain/model/load_balancer.py +185 -0
  62. mcp_hangar/domain/model/provider.py +810 -0
  63. mcp_hangar/domain/model/provider_group.py +656 -0
  64. mcp_hangar/domain/model/tool_catalog.py +105 -0
  65. mcp_hangar/domain/policies/__init__.py +19 -0
  66. mcp_hangar/domain/policies/provider_health.py +187 -0
  67. mcp_hangar/domain/repository.py +249 -0
  68. mcp_hangar/domain/security/__init__.py +85 -0
  69. mcp_hangar/domain/security/input_validator.py +710 -0
  70. mcp_hangar/domain/security/rate_limiter.py +387 -0
  71. mcp_hangar/domain/security/roles.py +237 -0
  72. mcp_hangar/domain/security/sanitizer.py +387 -0
  73. mcp_hangar/domain/security/secrets.py +501 -0
  74. mcp_hangar/domain/services/__init__.py +20 -0
  75. mcp_hangar/domain/services/audit_service.py +376 -0
  76. mcp_hangar/domain/services/image_builder.py +328 -0
  77. mcp_hangar/domain/services/provider_launcher.py +1046 -0
  78. mcp_hangar/domain/value_objects.py +1138 -0
  79. mcp_hangar/errors.py +818 -0
  80. mcp_hangar/fastmcp_server.py +1105 -0
  81. mcp_hangar/gc.py +134 -0
  82. mcp_hangar/infrastructure/__init__.py +79 -0
  83. mcp_hangar/infrastructure/async_executor.py +133 -0
  84. mcp_hangar/infrastructure/auth/__init__.py +37 -0
  85. mcp_hangar/infrastructure/auth/api_key_authenticator.py +388 -0
  86. mcp_hangar/infrastructure/auth/event_sourced_store.py +567 -0
  87. mcp_hangar/infrastructure/auth/jwt_authenticator.py +360 -0
  88. mcp_hangar/infrastructure/auth/middleware.py +340 -0
  89. mcp_hangar/infrastructure/auth/opa_authorizer.py +243 -0
  90. mcp_hangar/infrastructure/auth/postgres_store.py +659 -0
  91. mcp_hangar/infrastructure/auth/projections.py +366 -0
  92. mcp_hangar/infrastructure/auth/rate_limiter.py +311 -0
  93. mcp_hangar/infrastructure/auth/rbac_authorizer.py +323 -0
  94. mcp_hangar/infrastructure/auth/sqlite_store.py +624 -0
  95. mcp_hangar/infrastructure/command_bus.py +112 -0
  96. mcp_hangar/infrastructure/discovery/__init__.py +110 -0
  97. mcp_hangar/infrastructure/discovery/docker_source.py +289 -0
  98. mcp_hangar/infrastructure/discovery/entrypoint_source.py +249 -0
  99. mcp_hangar/infrastructure/discovery/filesystem_source.py +383 -0
  100. mcp_hangar/infrastructure/discovery/kubernetes_source.py +247 -0
  101. mcp_hangar/infrastructure/event_bus.py +260 -0
  102. mcp_hangar/infrastructure/event_sourced_repository.py +443 -0
  103. mcp_hangar/infrastructure/event_store.py +396 -0
  104. mcp_hangar/infrastructure/knowledge_base/__init__.py +259 -0
  105. mcp_hangar/infrastructure/knowledge_base/contracts.py +202 -0
  106. mcp_hangar/infrastructure/knowledge_base/memory.py +177 -0
  107. mcp_hangar/infrastructure/knowledge_base/postgres.py +545 -0
  108. mcp_hangar/infrastructure/knowledge_base/sqlite.py +513 -0
  109. mcp_hangar/infrastructure/metrics_publisher.py +36 -0
  110. mcp_hangar/infrastructure/observability/__init__.py +10 -0
  111. mcp_hangar/infrastructure/observability/langfuse_adapter.py +534 -0
  112. mcp_hangar/infrastructure/persistence/__init__.py +33 -0
  113. mcp_hangar/infrastructure/persistence/audit_repository.py +371 -0
  114. mcp_hangar/infrastructure/persistence/config_repository.py +398 -0
  115. mcp_hangar/infrastructure/persistence/database.py +333 -0
  116. mcp_hangar/infrastructure/persistence/database_common.py +330 -0
  117. mcp_hangar/infrastructure/persistence/event_serializer.py +280 -0
  118. mcp_hangar/infrastructure/persistence/event_upcaster.py +166 -0
  119. mcp_hangar/infrastructure/persistence/in_memory_event_store.py +150 -0
  120. mcp_hangar/infrastructure/persistence/recovery_service.py +312 -0
  121. mcp_hangar/infrastructure/persistence/sqlite_event_store.py +386 -0
  122. mcp_hangar/infrastructure/persistence/unit_of_work.py +409 -0
  123. mcp_hangar/infrastructure/persistence/upcasters/README.md +13 -0
  124. mcp_hangar/infrastructure/persistence/upcasters/__init__.py +7 -0
  125. mcp_hangar/infrastructure/query_bus.py +153 -0
  126. mcp_hangar/infrastructure/saga_manager.py +401 -0
  127. mcp_hangar/logging_config.py +209 -0
  128. mcp_hangar/metrics.py +1007 -0
  129. mcp_hangar/models.py +31 -0
  130. mcp_hangar/observability/__init__.py +54 -0
  131. mcp_hangar/observability/health.py +487 -0
  132. mcp_hangar/observability/metrics.py +319 -0
  133. mcp_hangar/observability/tracing.py +433 -0
  134. mcp_hangar/progress.py +542 -0
  135. mcp_hangar/retry.py +613 -0
  136. mcp_hangar/server/__init__.py +120 -0
  137. mcp_hangar/server/__main__.py +6 -0
  138. mcp_hangar/server/auth_bootstrap.py +340 -0
  139. mcp_hangar/server/auth_cli.py +335 -0
  140. mcp_hangar/server/auth_config.py +305 -0
  141. mcp_hangar/server/bootstrap.py +735 -0
  142. mcp_hangar/server/cli.py +161 -0
  143. mcp_hangar/server/config.py +224 -0
  144. mcp_hangar/server/context.py +215 -0
  145. mcp_hangar/server/http_auth_middleware.py +165 -0
  146. mcp_hangar/server/lifecycle.py +467 -0
  147. mcp_hangar/server/state.py +117 -0
  148. mcp_hangar/server/tools/__init__.py +16 -0
  149. mcp_hangar/server/tools/discovery.py +186 -0
  150. mcp_hangar/server/tools/groups.py +75 -0
  151. mcp_hangar/server/tools/health.py +301 -0
  152. mcp_hangar/server/tools/provider.py +939 -0
  153. mcp_hangar/server/tools/registry.py +320 -0
  154. mcp_hangar/server/validation.py +113 -0
  155. mcp_hangar/stdio_client.py +229 -0
  156. mcp_hangar-0.2.0.dist-info/METADATA +347 -0
  157. mcp_hangar-0.2.0.dist-info/RECORD +160 -0
  158. mcp_hangar-0.2.0.dist-info/WHEEL +4 -0
  159. mcp_hangar-0.2.0.dist-info/entry_points.txt +2 -0
  160. mcp_hangar-0.2.0.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,414 @@
1
+ """Security Validator for Discovery.
2
+
3
+ Validates discovered providers before registration.
4
+ Implements a multi-stage validation pipeline with security controls.
5
+
6
+ Validation Pipeline:
7
+ 1. Source Validation - Is the source trusted?
8
+ 2. Rate Limit Check - Is this source flooding?
9
+ 3. Health Check - Does the provider respond?
10
+ 4. Schema Validation - Does it implement MCP correctly?
11
+ """
12
+
13
+ import asyncio
14
+ from dataclasses import dataclass, field
15
+ from enum import Enum
16
+ import time
17
+ from typing import Any, Dict, List, Optional, Set
18
+
19
+ from mcp_hangar.domain.discovery.discovered_provider import DiscoveredProvider
20
+
21
+ from ...logging_config import get_logger
22
+
23
+ logger = get_logger(__name__)
24
+
25
+ # Optional aiohttp dependency
26
+ try:
27
+ import aiohttp
28
+
29
+ AIOHTTP_AVAILABLE = True
30
+ except ImportError:
31
+ AIOHTTP_AVAILABLE = False
32
+ # Note: No logging here - module is imported before setup_logging() is called
33
+
34
+
35
+ class ValidationResult(Enum):
36
+ """Result of validation pipeline."""
37
+
38
+ PASSED = "passed"
39
+ FAILED_SOURCE = "failed_source"
40
+ FAILED_HEALTH = "failed_health"
41
+ FAILED_SCHEMA = "failed_schema"
42
+ FAILED_RATE_LIMIT = "failed_rate_limit"
43
+ SKIPPED = "skipped"
44
+
45
+ def __str__(self) -> str:
46
+ return self.value
47
+
48
+ @property
49
+ def is_passed(self) -> bool:
50
+ return self in (ValidationResult.PASSED, ValidationResult.SKIPPED)
51
+
52
+
53
+ @dataclass
54
+ class ValidationReport:
55
+ """Report from validation pipeline.
56
+
57
+ Attributes:
58
+ result: Validation result
59
+ provider: Provider being validated
60
+ reason: Human-readable explanation
61
+ details: Additional details (URLs, errors, etc.)
62
+ duration_ms: Validation duration in milliseconds
63
+ """
64
+
65
+ result: ValidationResult
66
+ provider: DiscoveredProvider
67
+ reason: str
68
+ details: Optional[Dict[str, Any]] = None
69
+ duration_ms: float = 0.0
70
+
71
+ @property
72
+ def is_passed(self) -> bool:
73
+ return self.result.is_passed
74
+
75
+ def to_dict(self) -> Dict[str, Any]:
76
+ """Convert to dictionary for serialization."""
77
+ return {
78
+ "result": self.result.value,
79
+ "provider_name": self.provider.name,
80
+ "reason": self.reason,
81
+ "details": self.details,
82
+ "duration_ms": self.duration_ms,
83
+ }
84
+
85
+
86
+ @dataclass
87
+ class SecurityConfig:
88
+ """Security configuration for validation.
89
+
90
+ Attributes:
91
+ allowed_namespaces: Whitelist of K8s namespaces
92
+ denied_namespaces: Blacklist of K8s namespaces
93
+ require_health_check: Whether to require health check pass
94
+ require_mcp_schema: Whether to validate MCP schema
95
+ max_providers_per_source: Max providers from single source
96
+ max_registration_rate: Max registrations per minute per source
97
+ health_check_timeout_s: Health check timeout in seconds
98
+ quarantine_on_failure: Whether to quarantine failed providers
99
+ """
100
+
101
+ allowed_namespaces: Set[str] = field(default_factory=set)
102
+ denied_namespaces: Set[str] = field(default_factory=lambda: {"kube-system", "default"})
103
+ require_health_check: bool = True
104
+ require_mcp_schema: bool = False
105
+ max_providers_per_source: int = 100
106
+ max_registration_rate: int = 10 # per minute
107
+ health_check_timeout_s: float = 5.0
108
+ quarantine_on_failure: bool = True
109
+
110
+ @classmethod
111
+ def from_dict(cls, data: Dict[str, Any]) -> "SecurityConfig":
112
+ """Create from dictionary."""
113
+ return cls(
114
+ allowed_namespaces=set(data.get("allowed_namespaces", [])),
115
+ denied_namespaces=set(data.get("denied_namespaces", ["kube-system", "default"])),
116
+ require_health_check=data.get("require_health_check", True),
117
+ require_mcp_schema=data.get("require_mcp_schema", False),
118
+ max_providers_per_source=data.get("max_providers_per_source", 100),
119
+ max_registration_rate=data.get("max_registration_rate", 10),
120
+ health_check_timeout_s=data.get("health_check_timeout_s", 5.0),
121
+ quarantine_on_failure=data.get("quarantine_on_failure", True),
122
+ )
123
+
124
+
125
+ class SecurityValidator:
126
+ """Validates discovered providers before registration.
127
+
128
+ Implements a multi-stage validation pipeline:
129
+ 1. Source Validation - Namespace whitelist/blacklist
130
+ 2. Rate Limit Check - Prevent registration floods
131
+ 3. Health Check - Verify provider is responsive
132
+ 4. Schema Validation - Verify MCP compliance
133
+
134
+ Usage:
135
+ validator = SecurityValidator(config)
136
+ report = await validator.validate(provider)
137
+ if report.is_passed:
138
+ # Register provider
139
+ else:
140
+ # Quarantine or reject
141
+ """
142
+
143
+ def __init__(self, config: Optional[SecurityConfig] = None):
144
+ """Initialize security validator.
145
+
146
+ Args:
147
+ config: Security configuration
148
+ """
149
+ self.config = config or SecurityConfig()
150
+
151
+ # Rate limiting state: source -> list of timestamps
152
+ self._registration_counts: Dict[str, List[float]] = {}
153
+
154
+ # Provider counts per source
155
+ self._provider_counts: Dict[str, int] = {}
156
+
157
+ async def validate(self, provider: DiscoveredProvider) -> ValidationReport:
158
+ """Run full validation pipeline.
159
+
160
+ Args:
161
+ provider: Provider to validate
162
+
163
+ Returns:
164
+ ValidationReport with result and details
165
+ """
166
+ start_time = time.perf_counter()
167
+
168
+ # Step 1: Source validation
169
+ source_result = self._validate_source(provider)
170
+ if source_result:
171
+ source_result.duration_ms = (time.perf_counter() - start_time) * 1000
172
+ return source_result
173
+
174
+ # Step 2: Rate limit check
175
+ rate_result = self._check_rate_limit(provider)
176
+ if rate_result:
177
+ rate_result.duration_ms = (time.perf_counter() - start_time) * 1000
178
+ return rate_result
179
+
180
+ # Step 3: Provider count check
181
+ count_result = self._check_provider_count(provider)
182
+ if count_result:
183
+ count_result.duration_ms = (time.perf_counter() - start_time) * 1000
184
+ return count_result
185
+
186
+ # Step 4: Health check (for HTTP providers)
187
+ if self.config.require_health_check:
188
+ health_result = await self._validate_health(provider)
189
+ if health_result:
190
+ health_result.duration_ms = (time.perf_counter() - start_time) * 1000
191
+ return health_result
192
+
193
+ # Step 5: MCP schema validation
194
+ if self.config.require_mcp_schema:
195
+ schema_result = await self._validate_schema(provider)
196
+ if schema_result:
197
+ schema_result.duration_ms = (time.perf_counter() - start_time) * 1000
198
+ return schema_result
199
+
200
+ # All checks passed
201
+ duration_ms = (time.perf_counter() - start_time) * 1000
202
+ return ValidationReport(
203
+ result=ValidationResult.PASSED,
204
+ provider=provider,
205
+ reason="All validation checks passed",
206
+ duration_ms=duration_ms,
207
+ )
208
+
209
+ def _validate_source(self, provider: DiscoveredProvider) -> Optional[ValidationReport]:
210
+ """Validate source is trusted.
211
+
212
+ Args:
213
+ provider: Provider to validate
214
+
215
+ Returns:
216
+ ValidationReport if failed, None if passed
217
+ """
218
+ # Kubernetes namespace checks
219
+ if provider.source_type == "kubernetes":
220
+ namespace = provider.metadata.get("namespace", "")
221
+
222
+ # Check denied list first
223
+ if namespace in self.config.denied_namespaces:
224
+ return ValidationReport(
225
+ result=ValidationResult.FAILED_SOURCE,
226
+ provider=provider,
227
+ reason=f"Namespace '{namespace}' is in denied list",
228
+ details={
229
+ "namespace": namespace,
230
+ "denied_namespaces": list(self.config.denied_namespaces),
231
+ },
232
+ )
233
+
234
+ # If allowed list is specified, check it
235
+ if self.config.allowed_namespaces and namespace not in self.config.allowed_namespaces:
236
+ return ValidationReport(
237
+ result=ValidationResult.FAILED_SOURCE,
238
+ provider=provider,
239
+ reason=f"Namespace '{namespace}' is not in allowed list",
240
+ details={
241
+ "namespace": namespace,
242
+ "allowed_namespaces": list(self.config.allowed_namespaces),
243
+ },
244
+ )
245
+
246
+ return None
247
+
248
+ def _check_rate_limit(self, provider: DiscoveredProvider) -> Optional[ValidationReport]:
249
+ """Check registration rate limit.
250
+
251
+ Args:
252
+ provider: Provider to validate
253
+
254
+ Returns:
255
+ ValidationReport if rate exceeded, None if within limit
256
+ """
257
+ source = provider.source_type
258
+ now = time.time()
259
+ window = 60.0 # 1 minute window
260
+
261
+ # Initialize if needed
262
+ if source not in self._registration_counts:
263
+ self._registration_counts[source] = []
264
+
265
+ # Clean old entries
266
+ self._registration_counts[source] = [t for t in self._registration_counts[source] if now - t < window]
267
+
268
+ # Check rate
269
+ if len(self._registration_counts[source]) >= self.config.max_registration_rate:
270
+ return ValidationReport(
271
+ result=ValidationResult.FAILED_RATE_LIMIT,
272
+ provider=provider,
273
+ reason=f"Rate limit exceeded for source '{source}'",
274
+ details={
275
+ "source": source,
276
+ "current_rate": len(self._registration_counts[source]),
277
+ "max_rate": self.config.max_registration_rate,
278
+ "window_seconds": window,
279
+ },
280
+ )
281
+
282
+ # Record this registration attempt
283
+ self._registration_counts[source].append(now)
284
+ return None
285
+
286
+ def _check_provider_count(self, provider: DiscoveredProvider) -> Optional[ValidationReport]:
287
+ """Check provider count per source.
288
+
289
+ Args:
290
+ provider: Provider to validate
291
+
292
+ Returns:
293
+ ValidationReport if count exceeded, None if within limit
294
+ """
295
+ source = provider.source_type
296
+ current_count = self._provider_counts.get(source, 0)
297
+
298
+ if current_count >= self.config.max_providers_per_source:
299
+ return ValidationReport(
300
+ result=ValidationResult.FAILED_RATE_LIMIT,
301
+ provider=provider,
302
+ reason=f"Max providers exceeded for source '{source}'",
303
+ details={
304
+ "source": source,
305
+ "current_count": current_count,
306
+ "max_count": self.config.max_providers_per_source,
307
+ },
308
+ )
309
+
310
+ return None
311
+
312
+ async def _validate_health(self, provider: DiscoveredProvider) -> Optional[ValidationReport]:
313
+ """Validate provider health endpoint.
314
+
315
+ Args:
316
+ provider: Provider to validate
317
+
318
+ Returns:
319
+ ValidationReport if health check failed, None if passed
320
+ """
321
+ # Only check HTTP-based providers
322
+ if provider.mode not in ("http", "sse", "remote"):
323
+ return None
324
+
325
+ if not AIOHTTP_AVAILABLE:
326
+ logger.debug(f"Skipping health check for {provider.name} (aiohttp not available)")
327
+ return None
328
+
329
+ host = provider.connection_info.get("host")
330
+ port = provider.connection_info.get("port")
331
+ health_path = provider.connection_info.get("health_path", "/health")
332
+
333
+ if not host or not port:
334
+ return ValidationReport(
335
+ result=ValidationResult.FAILED_HEALTH,
336
+ provider=provider,
337
+ reason="Missing host or port in connection_info",
338
+ details={"connection_info": provider.connection_info},
339
+ )
340
+
341
+ url = f"http://{host}:{port}{health_path}"
342
+
343
+ try:
344
+ timeout = aiohttp.ClientTimeout(total=self.config.health_check_timeout_s)
345
+
346
+ async with aiohttp.ClientSession(timeout=timeout) as session:
347
+ async with session.get(url) as response:
348
+ if response.status != 200:
349
+ return ValidationReport(
350
+ result=ValidationResult.FAILED_HEALTH,
351
+ provider=provider,
352
+ reason=f"Health check returned status {response.status}",
353
+ details={"url": url, "status": response.status},
354
+ )
355
+
356
+ except asyncio.TimeoutError:
357
+ return ValidationReport(
358
+ result=ValidationResult.FAILED_HEALTH,
359
+ provider=provider,
360
+ reason="Health check timed out",
361
+ details={"url": url, "timeout": self.config.health_check_timeout_s},
362
+ )
363
+ except Exception as e:
364
+ return ValidationReport(
365
+ result=ValidationResult.FAILED_HEALTH,
366
+ provider=provider,
367
+ reason=f"Health check failed: {e}",
368
+ details={"url": url, "error": str(e)},
369
+ )
370
+
371
+ return None
372
+
373
+ async def _validate_schema(self, provider: DiscoveredProvider) -> Optional[ValidationReport]:
374
+ """Validate MCP tools schema.
375
+
376
+ Args:
377
+ provider: Provider to validate
378
+
379
+ Returns:
380
+ ValidationReport if schema invalid, None if valid
381
+ """
382
+ # NOTE: MCP schema validation is intentionally deferred.
383
+ # The provider's tools/list response should be validated against MCP spec,
384
+ # but this requires network calls during registration which adds latency.
385
+ # Schema validation can be done lazily on first tool invocation instead.
386
+ logger.debug(f"Schema validation deferred for {provider.name}")
387
+ return None
388
+
389
+ def record_registration(self, provider: DiscoveredProvider) -> None:
390
+ """Record successful registration for counting.
391
+
392
+ Args:
393
+ provider: Registered provider
394
+ """
395
+ source = provider.source_type
396
+ self._provider_counts[source] = self._provider_counts.get(source, 0) + 1
397
+
398
+ def record_deregistration(self, provider: DiscoveredProvider) -> None:
399
+ """Record deregistration for counting.
400
+
401
+ Args:
402
+ provider: Deregistered provider
403
+ """
404
+ source = provider.source_type
405
+ if source in self._provider_counts:
406
+ self._provider_counts[source] = max(0, self._provider_counts[source] - 1)
407
+
408
+ def reset_rate_limits(self) -> None:
409
+ """Reset all rate limit counters."""
410
+ self._registration_counts.clear()
411
+
412
+ def reset_provider_counts(self) -> None:
413
+ """Reset all provider counts."""
414
+ self._provider_counts.clear()
@@ -0,0 +1,50 @@
1
+ """Event handlers for reacting to domain events."""
2
+
3
+ from .alert_handler import Alert, AlertEventHandler, AlertSink, CallbackAlertSink, LogAlertSink
4
+ from .audit_handler import AuditEventHandler, AuditRecord, AuditStore, InMemoryAuditStore, LogAuditStore
5
+ from .logging_handler import LoggingEventHandler
6
+ from .metrics_handler import MetricsEventHandler
7
+ from .security_handler import (
8
+ CallbackSecuritySink,
9
+ CompositeSecuritySink,
10
+ get_security_handler,
11
+ InMemorySecuritySink,
12
+ LogSecuritySink,
13
+ reset_security_handler,
14
+ SecurityEvent,
15
+ SecurityEventHandler,
16
+ SecurityEventSink,
17
+ SecurityEventType,
18
+ SecuritySeverity,
19
+ )
20
+
21
+ __all__ = [
22
+ # Logging
23
+ "LoggingEventHandler",
24
+ # Metrics
25
+ "MetricsEventHandler",
26
+ # Alerts
27
+ "AlertEventHandler",
28
+ "Alert",
29
+ "AlertSink",
30
+ "LogAlertSink",
31
+ "CallbackAlertSink",
32
+ # Audit
33
+ "AuditEventHandler",
34
+ "AuditRecord",
35
+ "AuditStore",
36
+ "InMemoryAuditStore",
37
+ "LogAuditStore",
38
+ # Security
39
+ "SecurityEventHandler",
40
+ "SecurityEvent",
41
+ "SecurityEventType",
42
+ "SecuritySeverity",
43
+ "SecurityEventSink",
44
+ "LogSecuritySink",
45
+ "InMemorySecuritySink",
46
+ "CallbackSecuritySink",
47
+ "CompositeSecuritySink",
48
+ "get_security_handler",
49
+ "reset_security_handler",
50
+ ]
@@ -0,0 +1,191 @@
1
+ """Alert event handler for critical notifications."""
2
+
3
+ from abc import ABC, abstractmethod
4
+ from dataclasses import dataclass, field
5
+ from datetime import datetime, timezone
6
+ from typing import Any, Callable, Dict, List, Optional
7
+
8
+ from ...domain.events import DomainEvent, HealthCheckFailed, ProviderDegraded, ProviderStopped, ToolInvocationFailed
9
+ from ...logging_config import get_logger
10
+
11
+ logger = get_logger(__name__)
12
+
13
+
14
+ @dataclass
15
+ class Alert:
16
+ """Represents an alert notification."""
17
+
18
+ level: str # critical, warning, info
19
+ message: str
20
+ provider_id: str
21
+ event_type: str
22
+ timestamp: datetime = field(default_factory=lambda: datetime.now(timezone.utc))
23
+ details: Dict[str, Any] = field(default_factory=dict)
24
+
25
+ def to_dict(self) -> Dict[str, Any]:
26
+ """Convert alert to dictionary."""
27
+ return {
28
+ "level": self.level,
29
+ "message": self.message,
30
+ "provider_id": self.provider_id,
31
+ "event_type": self.event_type,
32
+ "timestamp": self.timestamp.isoformat(),
33
+ "details": self.details,
34
+ }
35
+
36
+
37
+ class AlertSink(ABC):
38
+ """Abstract interface for alert destinations."""
39
+
40
+ @abstractmethod
41
+ def send(self, alert: Alert) -> None:
42
+ """Send an alert to the sink."""
43
+ pass
44
+
45
+
46
+ class LogAlertSink(AlertSink):
47
+ """Send alerts to the log."""
48
+
49
+ def send(self, alert: Alert) -> None:
50
+ """Log the alert."""
51
+ log_method = {
52
+ "critical": logger.critical,
53
+ "warning": logger.warning,
54
+ "info": logger.info,
55
+ }.get(alert.level, logger.info)
56
+
57
+ log_method(
58
+ f"ALERT [{alert.level.upper()}] {alert.message} provider={alert.provider_id} event={alert.event_type}"
59
+ )
60
+
61
+
62
+ class CallbackAlertSink(AlertSink):
63
+ """Send alerts to a callback function."""
64
+
65
+ def __init__(self, callback: Callable[[Alert], None]):
66
+ self._callback = callback
67
+
68
+ def send(self, alert: Alert) -> None:
69
+ """Call the callback with the alert."""
70
+ self._callback(alert)
71
+
72
+
73
+ class AlertEventHandler:
74
+ """
75
+ Event handler that generates alerts for critical events.
76
+
77
+ Monitors domain events and generates alerts when:
78
+ - Provider is degraded
79
+ - Provider stops unexpectedly
80
+ - Tool invocation fails
81
+ - Health check fails repeatedly
82
+ """
83
+
84
+ def __init__(
85
+ self,
86
+ sinks: Optional[List[AlertSink]] = None,
87
+ degradation_threshold: int = 3,
88
+ health_failure_threshold: int = 3,
89
+ ):
90
+ """
91
+ Initialize the alert handler.
92
+
93
+ Args:
94
+ sinks: List of alert sinks to send alerts to
95
+ degradation_threshold: Number of failures before critical alert
96
+ health_failure_threshold: Consecutive health failures for warning
97
+ """
98
+ self._sinks = sinks or [LogAlertSink()]
99
+ self._degradation_threshold = degradation_threshold
100
+ self._health_failure_threshold = health_failure_threshold
101
+ self._alerts_sent: List[Alert] = []
102
+
103
+ def handle(self, event: DomainEvent) -> None:
104
+ """Handle a domain event and potentially generate alerts."""
105
+ if isinstance(event, ProviderDegraded):
106
+ self._handle_degraded(event)
107
+ elif isinstance(event, ProviderStopped):
108
+ self._handle_stopped(event)
109
+ elif isinstance(event, ToolInvocationFailed):
110
+ self._handle_tool_failed(event)
111
+ elif isinstance(event, HealthCheckFailed):
112
+ self._handle_health_failed(event)
113
+
114
+ def _handle_degraded(self, event: ProviderDegraded) -> None:
115
+ """Handle provider degraded event."""
116
+ level = "critical" if event.consecutive_failures >= self._degradation_threshold else "warning"
117
+
118
+ alert = Alert(
119
+ level=level,
120
+ message=f"Provider degraded after {event.consecutive_failures} failures",
121
+ provider_id=event.provider_id,
122
+ event_type="ProviderDegraded",
123
+ details={
124
+ "consecutive_failures": event.consecutive_failures,
125
+ "total_failures": event.total_failures,
126
+ "reason": event.reason,
127
+ },
128
+ )
129
+ self._send_alert(alert)
130
+
131
+ def _handle_stopped(self, event: ProviderStopped) -> None:
132
+ """Handle provider stopped event."""
133
+ # Only alert for unexpected stops (not shutdown or idle)
134
+ if event.reason not in ("shutdown", "idle"):
135
+ alert = Alert(
136
+ level="warning",
137
+ message=f"Provider stopped unexpectedly: {event.reason}",
138
+ provider_id=event.provider_id,
139
+ event_type="ProviderStopped",
140
+ details={"reason": event.reason},
141
+ )
142
+ self._send_alert(alert)
143
+
144
+ def _handle_tool_failed(self, event: ToolInvocationFailed) -> None:
145
+ """Handle tool invocation failed event."""
146
+ alert = Alert(
147
+ level="warning",
148
+ message=f"Tool invocation failed: {event.tool_name}",
149
+ provider_id=event.provider_id,
150
+ event_type="ToolInvocationFailed",
151
+ details={
152
+ "tool_name": event.tool_name,
153
+ "error_message": event.error_message,
154
+ "error_type": event.error_type,
155
+ "correlation_id": event.correlation_id,
156
+ },
157
+ )
158
+ self._send_alert(alert)
159
+
160
+ def _handle_health_failed(self, event: HealthCheckFailed) -> None:
161
+ """Handle health check failed event."""
162
+ if event.consecutive_failures >= self._health_failure_threshold:
163
+ alert = Alert(
164
+ level="warning",
165
+ message=f"Health check failed {event.consecutive_failures} times",
166
+ provider_id=event.provider_id,
167
+ event_type="HealthCheckFailed",
168
+ details={
169
+ "consecutive_failures": event.consecutive_failures,
170
+ "error_message": event.error_message,
171
+ },
172
+ )
173
+ self._send_alert(alert)
174
+
175
+ def _send_alert(self, alert: Alert) -> None:
176
+ """Send alert to all sinks."""
177
+ self._alerts_sent.append(alert)
178
+ for sink in self._sinks:
179
+ try:
180
+ sink.send(alert)
181
+ except Exception as e:
182
+ logger.error(f"Failed to send alert to sink: {e}")
183
+
184
+ @property
185
+ def alerts_sent(self) -> List[Alert]:
186
+ """Get list of alerts sent (for testing)."""
187
+ return list(self._alerts_sent)
188
+
189
+ def clear_alerts(self) -> None:
190
+ """Clear sent alerts (for testing)."""
191
+ self._alerts_sent.clear()