mcp-hangar 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (160) hide show
  1. mcp_hangar/__init__.py +139 -0
  2. mcp_hangar/application/__init__.py +1 -0
  3. mcp_hangar/application/commands/__init__.py +67 -0
  4. mcp_hangar/application/commands/auth_commands.py +118 -0
  5. mcp_hangar/application/commands/auth_handlers.py +296 -0
  6. mcp_hangar/application/commands/commands.py +59 -0
  7. mcp_hangar/application/commands/handlers.py +189 -0
  8. mcp_hangar/application/discovery/__init__.py +21 -0
  9. mcp_hangar/application/discovery/discovery_metrics.py +283 -0
  10. mcp_hangar/application/discovery/discovery_orchestrator.py +497 -0
  11. mcp_hangar/application/discovery/lifecycle_manager.py +315 -0
  12. mcp_hangar/application/discovery/security_validator.py +414 -0
  13. mcp_hangar/application/event_handlers/__init__.py +50 -0
  14. mcp_hangar/application/event_handlers/alert_handler.py +191 -0
  15. mcp_hangar/application/event_handlers/audit_handler.py +203 -0
  16. mcp_hangar/application/event_handlers/knowledge_base_handler.py +120 -0
  17. mcp_hangar/application/event_handlers/logging_handler.py +69 -0
  18. mcp_hangar/application/event_handlers/metrics_handler.py +152 -0
  19. mcp_hangar/application/event_handlers/persistent_audit_store.py +217 -0
  20. mcp_hangar/application/event_handlers/security_handler.py +604 -0
  21. mcp_hangar/application/mcp/tooling.py +158 -0
  22. mcp_hangar/application/ports/__init__.py +9 -0
  23. mcp_hangar/application/ports/observability.py +237 -0
  24. mcp_hangar/application/queries/__init__.py +52 -0
  25. mcp_hangar/application/queries/auth_handlers.py +237 -0
  26. mcp_hangar/application/queries/auth_queries.py +118 -0
  27. mcp_hangar/application/queries/handlers.py +227 -0
  28. mcp_hangar/application/read_models/__init__.py +11 -0
  29. mcp_hangar/application/read_models/provider_views.py +139 -0
  30. mcp_hangar/application/sagas/__init__.py +11 -0
  31. mcp_hangar/application/sagas/group_rebalance_saga.py +137 -0
  32. mcp_hangar/application/sagas/provider_failover_saga.py +266 -0
  33. mcp_hangar/application/sagas/provider_recovery_saga.py +172 -0
  34. mcp_hangar/application/services/__init__.py +9 -0
  35. mcp_hangar/application/services/provider_service.py +208 -0
  36. mcp_hangar/application/services/traced_provider_service.py +211 -0
  37. mcp_hangar/bootstrap/runtime.py +328 -0
  38. mcp_hangar/context.py +178 -0
  39. mcp_hangar/domain/__init__.py +117 -0
  40. mcp_hangar/domain/contracts/__init__.py +57 -0
  41. mcp_hangar/domain/contracts/authentication.py +225 -0
  42. mcp_hangar/domain/contracts/authorization.py +229 -0
  43. mcp_hangar/domain/contracts/event_store.py +178 -0
  44. mcp_hangar/domain/contracts/metrics_publisher.py +59 -0
  45. mcp_hangar/domain/contracts/persistence.py +383 -0
  46. mcp_hangar/domain/contracts/provider_runtime.py +146 -0
  47. mcp_hangar/domain/discovery/__init__.py +20 -0
  48. mcp_hangar/domain/discovery/conflict_resolver.py +267 -0
  49. mcp_hangar/domain/discovery/discovered_provider.py +185 -0
  50. mcp_hangar/domain/discovery/discovery_service.py +412 -0
  51. mcp_hangar/domain/discovery/discovery_source.py +192 -0
  52. mcp_hangar/domain/events.py +433 -0
  53. mcp_hangar/domain/exceptions.py +525 -0
  54. mcp_hangar/domain/model/__init__.py +70 -0
  55. mcp_hangar/domain/model/aggregate.py +58 -0
  56. mcp_hangar/domain/model/circuit_breaker.py +152 -0
  57. mcp_hangar/domain/model/event_sourced_api_key.py +413 -0
  58. mcp_hangar/domain/model/event_sourced_provider.py +423 -0
  59. mcp_hangar/domain/model/event_sourced_role_assignment.py +268 -0
  60. mcp_hangar/domain/model/health_tracker.py +183 -0
  61. mcp_hangar/domain/model/load_balancer.py +185 -0
  62. mcp_hangar/domain/model/provider.py +810 -0
  63. mcp_hangar/domain/model/provider_group.py +656 -0
  64. mcp_hangar/domain/model/tool_catalog.py +105 -0
  65. mcp_hangar/domain/policies/__init__.py +19 -0
  66. mcp_hangar/domain/policies/provider_health.py +187 -0
  67. mcp_hangar/domain/repository.py +249 -0
  68. mcp_hangar/domain/security/__init__.py +85 -0
  69. mcp_hangar/domain/security/input_validator.py +710 -0
  70. mcp_hangar/domain/security/rate_limiter.py +387 -0
  71. mcp_hangar/domain/security/roles.py +237 -0
  72. mcp_hangar/domain/security/sanitizer.py +387 -0
  73. mcp_hangar/domain/security/secrets.py +501 -0
  74. mcp_hangar/domain/services/__init__.py +20 -0
  75. mcp_hangar/domain/services/audit_service.py +376 -0
  76. mcp_hangar/domain/services/image_builder.py +328 -0
  77. mcp_hangar/domain/services/provider_launcher.py +1046 -0
  78. mcp_hangar/domain/value_objects.py +1138 -0
  79. mcp_hangar/errors.py +818 -0
  80. mcp_hangar/fastmcp_server.py +1105 -0
  81. mcp_hangar/gc.py +134 -0
  82. mcp_hangar/infrastructure/__init__.py +79 -0
  83. mcp_hangar/infrastructure/async_executor.py +133 -0
  84. mcp_hangar/infrastructure/auth/__init__.py +37 -0
  85. mcp_hangar/infrastructure/auth/api_key_authenticator.py +388 -0
  86. mcp_hangar/infrastructure/auth/event_sourced_store.py +567 -0
  87. mcp_hangar/infrastructure/auth/jwt_authenticator.py +360 -0
  88. mcp_hangar/infrastructure/auth/middleware.py +340 -0
  89. mcp_hangar/infrastructure/auth/opa_authorizer.py +243 -0
  90. mcp_hangar/infrastructure/auth/postgres_store.py +659 -0
  91. mcp_hangar/infrastructure/auth/projections.py +366 -0
  92. mcp_hangar/infrastructure/auth/rate_limiter.py +311 -0
  93. mcp_hangar/infrastructure/auth/rbac_authorizer.py +323 -0
  94. mcp_hangar/infrastructure/auth/sqlite_store.py +624 -0
  95. mcp_hangar/infrastructure/command_bus.py +112 -0
  96. mcp_hangar/infrastructure/discovery/__init__.py +110 -0
  97. mcp_hangar/infrastructure/discovery/docker_source.py +289 -0
  98. mcp_hangar/infrastructure/discovery/entrypoint_source.py +249 -0
  99. mcp_hangar/infrastructure/discovery/filesystem_source.py +383 -0
  100. mcp_hangar/infrastructure/discovery/kubernetes_source.py +247 -0
  101. mcp_hangar/infrastructure/event_bus.py +260 -0
  102. mcp_hangar/infrastructure/event_sourced_repository.py +443 -0
  103. mcp_hangar/infrastructure/event_store.py +396 -0
  104. mcp_hangar/infrastructure/knowledge_base/__init__.py +259 -0
  105. mcp_hangar/infrastructure/knowledge_base/contracts.py +202 -0
  106. mcp_hangar/infrastructure/knowledge_base/memory.py +177 -0
  107. mcp_hangar/infrastructure/knowledge_base/postgres.py +545 -0
  108. mcp_hangar/infrastructure/knowledge_base/sqlite.py +513 -0
  109. mcp_hangar/infrastructure/metrics_publisher.py +36 -0
  110. mcp_hangar/infrastructure/observability/__init__.py +10 -0
  111. mcp_hangar/infrastructure/observability/langfuse_adapter.py +534 -0
  112. mcp_hangar/infrastructure/persistence/__init__.py +33 -0
  113. mcp_hangar/infrastructure/persistence/audit_repository.py +371 -0
  114. mcp_hangar/infrastructure/persistence/config_repository.py +398 -0
  115. mcp_hangar/infrastructure/persistence/database.py +333 -0
  116. mcp_hangar/infrastructure/persistence/database_common.py +330 -0
  117. mcp_hangar/infrastructure/persistence/event_serializer.py +280 -0
  118. mcp_hangar/infrastructure/persistence/event_upcaster.py +166 -0
  119. mcp_hangar/infrastructure/persistence/in_memory_event_store.py +150 -0
  120. mcp_hangar/infrastructure/persistence/recovery_service.py +312 -0
  121. mcp_hangar/infrastructure/persistence/sqlite_event_store.py +386 -0
  122. mcp_hangar/infrastructure/persistence/unit_of_work.py +409 -0
  123. mcp_hangar/infrastructure/persistence/upcasters/README.md +13 -0
  124. mcp_hangar/infrastructure/persistence/upcasters/__init__.py +7 -0
  125. mcp_hangar/infrastructure/query_bus.py +153 -0
  126. mcp_hangar/infrastructure/saga_manager.py +401 -0
  127. mcp_hangar/logging_config.py +209 -0
  128. mcp_hangar/metrics.py +1007 -0
  129. mcp_hangar/models.py +31 -0
  130. mcp_hangar/observability/__init__.py +54 -0
  131. mcp_hangar/observability/health.py +487 -0
  132. mcp_hangar/observability/metrics.py +319 -0
  133. mcp_hangar/observability/tracing.py +433 -0
  134. mcp_hangar/progress.py +542 -0
  135. mcp_hangar/retry.py +613 -0
  136. mcp_hangar/server/__init__.py +120 -0
  137. mcp_hangar/server/__main__.py +6 -0
  138. mcp_hangar/server/auth_bootstrap.py +340 -0
  139. mcp_hangar/server/auth_cli.py +335 -0
  140. mcp_hangar/server/auth_config.py +305 -0
  141. mcp_hangar/server/bootstrap.py +735 -0
  142. mcp_hangar/server/cli.py +161 -0
  143. mcp_hangar/server/config.py +224 -0
  144. mcp_hangar/server/context.py +215 -0
  145. mcp_hangar/server/http_auth_middleware.py +165 -0
  146. mcp_hangar/server/lifecycle.py +467 -0
  147. mcp_hangar/server/state.py +117 -0
  148. mcp_hangar/server/tools/__init__.py +16 -0
  149. mcp_hangar/server/tools/discovery.py +186 -0
  150. mcp_hangar/server/tools/groups.py +75 -0
  151. mcp_hangar/server/tools/health.py +301 -0
  152. mcp_hangar/server/tools/provider.py +939 -0
  153. mcp_hangar/server/tools/registry.py +320 -0
  154. mcp_hangar/server/validation.py +113 -0
  155. mcp_hangar/stdio_client.py +229 -0
  156. mcp_hangar-0.2.0.dist-info/METADATA +347 -0
  157. mcp_hangar-0.2.0.dist-info/RECORD +160 -0
  158. mcp_hangar-0.2.0.dist-info/WHEEL +4 -0
  159. mcp_hangar-0.2.0.dist-info/entry_points.txt +2 -0
  160. mcp_hangar-0.2.0.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,117 @@
1
+ """Server state management - BACKWARD COMPATIBILITY MODULE.
2
+
3
+ This module provides backward compatibility with code that relies on
4
+ global state variables. New code should use ApplicationContext from
5
+ context.py instead.
6
+
7
+ DEPRECATED: Direct use of PROVIDERS, COMMAND_BUS, etc. is deprecated.
8
+ Use get_context() from context.py for dependency injection.
9
+
10
+ Example migration:
11
+ # Old (deprecated):
12
+ from ..state import COMMAND_BUS, PROVIDERS
13
+ COMMAND_BUS.send(command)
14
+
15
+ # New (recommended):
16
+ from ..context import get_context
17
+ ctx = get_context()
18
+ ctx.command_bus.send(command)
19
+ """
20
+
21
+ from typing import Dict, Optional, TYPE_CHECKING
22
+
23
+ from ..application.discovery import DiscoveryOrchestrator
24
+ from ..application.sagas import GroupRebalanceSaga
25
+ from ..bootstrap.runtime import create_runtime
26
+ from ..domain.model import ProviderGroup
27
+ from ..logging_config import get_logger
28
+
29
+ if TYPE_CHECKING:
30
+ from ..domain.repository import IProviderRepository
31
+
32
+ logger = get_logger(__name__)
33
+
34
+
35
+ class ProviderDict:
36
+ """Dictionary-like wrapper around provider repository for backward compatibility."""
37
+
38
+ def __init__(self, repository: "IProviderRepository"):
39
+ self._repo = repository
40
+
41
+ def __getitem__(self, key: str):
42
+ provider = self._repo.get(key)
43
+ if provider is None:
44
+ raise KeyError(key)
45
+ return provider
46
+
47
+ def __setitem__(self, key: str, value):
48
+ self._repo.add(key, value)
49
+
50
+ def __contains__(self, key: str) -> bool:
51
+ return self._repo.exists(key)
52
+
53
+ def __len__(self) -> int:
54
+ return self._repo.count()
55
+
56
+ def get(self, key: str, default=None):
57
+ return self._repo.get(key) or default
58
+
59
+ def items(self):
60
+ return self._repo.get_all().items()
61
+
62
+ def keys(self):
63
+ return self._repo.get_all_ids()
64
+
65
+ def values(self):
66
+ return self._repo.get_all().values()
67
+
68
+
69
+ # Runtime wiring
70
+ _RUNTIME = create_runtime()
71
+
72
+ # Convenience bindings
73
+ PROVIDER_REPOSITORY = _RUNTIME.repository
74
+ EVENT_BUS = _RUNTIME.event_bus
75
+ COMMAND_BUS = _RUNTIME.command_bus
76
+ QUERY_BUS = _RUNTIME.query_bus
77
+ RATE_LIMIT_CONFIG = _RUNTIME.rate_limit_config
78
+ RATE_LIMITER = _RUNTIME.rate_limiter
79
+ INPUT_VALIDATOR = _RUNTIME.input_validator
80
+ SECURITY_HANDLER = _RUNTIME.security_handler
81
+
82
+ # Provider dict backed by repository
83
+ PROVIDERS = ProviderDict(PROVIDER_REPOSITORY)
84
+
85
+ # Provider Groups storage
86
+ GROUPS: Dict[str, ProviderGroup] = {}
87
+
88
+ # Saga and discovery instances (initialized in main())
89
+ _GROUP_REBALANCE_SAGA: Optional[GroupRebalanceSaga] = None
90
+ _DISCOVERY_ORCHESTRATOR: Optional[DiscoveryOrchestrator] = None
91
+
92
+
93
+ def get_runtime():
94
+ """Get the runtime instance."""
95
+ return _RUNTIME
96
+
97
+
98
+ def set_discovery_orchestrator(orchestrator: Optional[DiscoveryOrchestrator]) -> None:
99
+ """Set the discovery orchestrator instance."""
100
+ global _DISCOVERY_ORCHESTRATOR
101
+ _DISCOVERY_ORCHESTRATOR = orchestrator
102
+
103
+
104
+ def get_discovery_orchestrator() -> Optional[DiscoveryOrchestrator]:
105
+ """Get the discovery orchestrator instance."""
106
+ return _DISCOVERY_ORCHESTRATOR
107
+
108
+
109
+ def set_group_rebalance_saga(saga: Optional[GroupRebalanceSaga]) -> None:
110
+ """Set the group rebalance saga instance."""
111
+ global _GROUP_REBALANCE_SAGA
112
+ _GROUP_REBALANCE_SAGA = saga
113
+
114
+
115
+ def get_group_rebalance_saga() -> Optional[GroupRebalanceSaga]:
116
+ """Get the group rebalance saga instance."""
117
+ return _GROUP_REBALANCE_SAGA
@@ -0,0 +1,16 @@
1
+ """MCP Tools modules."""
2
+
3
+ from .discovery import register_discovery_tools
4
+ from .groups import register_group_tools
5
+ from .health import register_health_tools
6
+ from .provider import register_provider_tools
7
+ from .registry import register_registry_tools, registry_list
8
+
9
+ __all__ = [
10
+ "register_registry_tools",
11
+ "register_provider_tools",
12
+ "register_health_tools",
13
+ "register_discovery_tools",
14
+ "register_group_tools",
15
+ "registry_list",
16
+ ]
@@ -0,0 +1,186 @@
1
+ """Discovery tools: discover, sources, approve, quarantine.
2
+
3
+ Uses ApplicationContext for dependency injection (DIP).
4
+ """
5
+
6
+ import asyncio
7
+ import concurrent.futures
8
+
9
+ from mcp.server.fastmcp import FastMCP
10
+
11
+ from ...application.mcp.tooling import key_global, mcp_tool_wrapper
12
+ from ..context import get_context
13
+ from ..validation import check_rate_limit, tool_error_hook, tool_error_mapper, validate_provider_id_input
14
+
15
+
16
+ def register_discovery_tools(mcp: FastMCP) -> None:
17
+ """Register discovery tools with MCP server."""
18
+
19
+ @mcp.tool(name="registry_discover")
20
+ @mcp_tool_wrapper(
21
+ tool_name="registry_discover",
22
+ rate_limit_key=key_global,
23
+ check_rate_limit=lambda key: check_rate_limit("registry_discover"),
24
+ validate=None,
25
+ error_mapper=lambda exc: tool_error_mapper(exc),
26
+ on_error=tool_error_hook,
27
+ )
28
+ def registry_discover() -> dict:
29
+ """
30
+ Trigger immediate discovery cycle across all configured sources.
31
+
32
+ Returns:
33
+ Dictionary with discovery statistics
34
+ """
35
+ orchestrator = get_context().discovery_orchestrator
36
+ if orchestrator is None:
37
+ return {"error": "Discovery not configured. Enable discovery in config.yaml"}
38
+
39
+ try:
40
+ loop = asyncio.get_event_loop()
41
+ if loop.is_running():
42
+ with concurrent.futures.ThreadPoolExecutor() as executor:
43
+ future = executor.submit(asyncio.run, orchestrator.trigger_discovery())
44
+ result = future.result(timeout=60)
45
+ else:
46
+ result = loop.run_until_complete(orchestrator.trigger_discovery())
47
+ except RuntimeError:
48
+ result = asyncio.run(orchestrator.trigger_discovery())
49
+
50
+ return result
51
+
52
+ @mcp.tool(name="registry_discovered")
53
+ @mcp_tool_wrapper(
54
+ tool_name="registry_discovered",
55
+ rate_limit_key=key_global,
56
+ check_rate_limit=lambda key: check_rate_limit("registry_discovered"),
57
+ validate=None,
58
+ error_mapper=lambda exc: tool_error_mapper(exc),
59
+ on_error=tool_error_hook,
60
+ )
61
+ def registry_discovered() -> dict:
62
+ """
63
+ List all discovered providers pending registration.
64
+
65
+ Returns:
66
+ Dictionary with 'pending' key containing list of pending providers
67
+ """
68
+ orchestrator = get_context().discovery_orchestrator
69
+ if orchestrator is None:
70
+ return {"error": "Discovery not configured. Enable discovery in config.yaml"}
71
+
72
+ pending = orchestrator.get_pending_providers()
73
+ return {
74
+ "pending": [
75
+ {
76
+ "name": p.name,
77
+ "source": p.source_type,
78
+ "mode": p.mode,
79
+ "discovered_at": p.discovered_at.isoformat(),
80
+ "fingerprint": p.fingerprint,
81
+ }
82
+ for p in pending
83
+ ]
84
+ }
85
+
86
+ @mcp.tool(name="registry_quarantine")
87
+ @mcp_tool_wrapper(
88
+ tool_name="registry_quarantine",
89
+ rate_limit_key=key_global,
90
+ check_rate_limit=lambda key: check_rate_limit("registry_quarantine"),
91
+ validate=None,
92
+ error_mapper=lambda exc: tool_error_mapper(exc),
93
+ on_error=tool_error_hook,
94
+ )
95
+ def registry_quarantine() -> dict:
96
+ """
97
+ List quarantined providers with failure reasons.
98
+
99
+ Returns:
100
+ Dictionary with 'quarantined' key containing list of quarantined providers
101
+ """
102
+ orchestrator = get_context().discovery_orchestrator
103
+ if orchestrator is None:
104
+ return {"error": "Discovery not configured. Enable discovery in config.yaml"}
105
+
106
+ quarantined = orchestrator.get_quarantined()
107
+ return {
108
+ "quarantined": [
109
+ {
110
+ "name": name,
111
+ "source": data["provider"]["source_type"],
112
+ "reason": data["reason"],
113
+ "quarantine_time": data["quarantine_time"],
114
+ }
115
+ for name, data in quarantined.items()
116
+ ]
117
+ }
118
+
119
+ @mcp.tool(name="registry_approve")
120
+ @mcp_tool_wrapper(
121
+ tool_name="registry_approve",
122
+ rate_limit_key=lambda provider: f"registry_approve:{provider}",
123
+ check_rate_limit=check_rate_limit,
124
+ validate=validate_provider_id_input,
125
+ error_mapper=lambda exc: tool_error_mapper(exc),
126
+ on_error=lambda exc, ctx: tool_error_hook(exc, ctx),
127
+ )
128
+ def registry_approve(provider: str) -> dict:
129
+ """
130
+ Approve a quarantined provider for registration.
131
+
132
+ Args:
133
+ provider: Name of the quarantined provider to approve
134
+
135
+ Returns:
136
+ Dictionary with approval result
137
+ """
138
+ orchestrator = get_context().discovery_orchestrator
139
+ if orchestrator is None:
140
+ return {"error": "Discovery not configured. Enable discovery in config.yaml"}
141
+
142
+ try:
143
+ loop = asyncio.get_event_loop()
144
+ if loop.is_running():
145
+ with concurrent.futures.ThreadPoolExecutor() as executor:
146
+ future = executor.submit(asyncio.run, orchestrator.approve_provider(provider))
147
+ result = future.result(timeout=60)
148
+ else:
149
+ result = loop.run_until_complete(orchestrator.approve_provider(provider))
150
+ except RuntimeError:
151
+ result = asyncio.run(orchestrator.approve_provider(provider))
152
+
153
+ return result
154
+
155
+ @mcp.tool(name="registry_sources")
156
+ @mcp_tool_wrapper(
157
+ tool_name="registry_sources",
158
+ rate_limit_key=key_global,
159
+ check_rate_limit=lambda key: check_rate_limit("registry_sources"),
160
+ validate=None,
161
+ error_mapper=lambda exc: tool_error_mapper(exc),
162
+ on_error=tool_error_hook,
163
+ )
164
+ def registry_sources() -> dict:
165
+ """
166
+ List configured discovery sources with health status.
167
+
168
+ Returns:
169
+ Dictionary with 'sources' key containing list of source status
170
+ """
171
+ orchestrator = get_context().discovery_orchestrator
172
+ if orchestrator is None:
173
+ return {"error": "Discovery not configured. Enable discovery in config.yaml"}
174
+
175
+ try:
176
+ loop = asyncio.get_event_loop()
177
+ if loop.is_running():
178
+ with concurrent.futures.ThreadPoolExecutor() as executor:
179
+ future = executor.submit(asyncio.run, orchestrator.get_sources_status())
180
+ sources = future.result(timeout=30)
181
+ else:
182
+ sources = loop.run_until_complete(orchestrator.get_sources_status())
183
+ except RuntimeError:
184
+ sources = asyncio.run(orchestrator.get_sources_status())
185
+
186
+ return {"sources": sources}
@@ -0,0 +1,75 @@
1
+ """Group management tools.
2
+
3
+ Uses ApplicationContext for dependency injection (DIP).
4
+ """
5
+
6
+ from mcp.server.fastmcp import FastMCP
7
+
8
+ from ...application.mcp.tooling import key_global, mcp_tool_wrapper
9
+ from ..context import get_context
10
+ from ..validation import check_rate_limit, tool_error_hook, tool_error_mapper, validate_provider_id_input
11
+
12
+
13
+ def register_group_tools(mcp: FastMCP) -> None:
14
+ """Register group management tools with MCP server."""
15
+
16
+ @mcp.tool(name="registry_group_list")
17
+ @mcp_tool_wrapper(
18
+ tool_name="registry_group_list",
19
+ rate_limit_key=key_global,
20
+ check_rate_limit=lambda key: check_rate_limit("registry_group_list"),
21
+ validate=None,
22
+ error_mapper=lambda exc: tool_error_mapper(exc),
23
+ on_error=tool_error_hook,
24
+ )
25
+ def registry_group_list() -> dict:
26
+ """
27
+ List all provider groups with detailed status.
28
+
29
+ This is a QUERY operation - read only.
30
+
31
+ Returns:
32
+ Dictionary with 'groups' key containing list of group info
33
+ """
34
+ ctx = get_context()
35
+ return {"groups": [group.to_status_dict() for group in ctx.groups.values()]}
36
+
37
+ @mcp.tool(name="registry_group_rebalance")
38
+ @mcp_tool_wrapper(
39
+ tool_name="registry_group_rebalance",
40
+ rate_limit_key=lambda group: f"registry_group_rebalance:{group}",
41
+ check_rate_limit=check_rate_limit,
42
+ validate=validate_provider_id_input,
43
+ error_mapper=lambda exc: tool_error_mapper(exc),
44
+ on_error=lambda exc, ctx_dict: tool_error_hook(exc, ctx_dict),
45
+ )
46
+ def registry_group_rebalance(group: str) -> dict:
47
+ """
48
+ Manually trigger rebalancing for a group.
49
+
50
+ This is a COMMAND operation - it changes state.
51
+
52
+ Args:
53
+ group: Group ID to rebalance
54
+
55
+ Returns:
56
+ Dictionary with group status after rebalancing
57
+
58
+ Raises:
59
+ ValueError: If group ID is unknown
60
+ """
61
+ ctx = get_context()
62
+
63
+ if not ctx.group_exists(group):
64
+ raise ValueError(f"unknown_group: {group}")
65
+
66
+ g = ctx.get_group(group)
67
+ g.rebalance()
68
+
69
+ return {
70
+ "group_id": group,
71
+ "state": g.state.value,
72
+ "healthy_count": g.healthy_count,
73
+ "total_members": g.total_count,
74
+ "members_in_rotation": [m.id for m in g.members if m.in_rotation],
75
+ }
@@ -0,0 +1,301 @@
1
+ """Health and metrics tools.
2
+
3
+ Uses ApplicationContext for dependency injection (DIP).
4
+ All operations are QUERY operations - read only, no side effects.
5
+ """
6
+
7
+ from typing import Any, Dict, List
8
+
9
+ from mcp.server.fastmcp import FastMCP
10
+
11
+ from ... import metrics as m
12
+ from ...application.mcp.tooling import key_global, mcp_tool_wrapper
13
+ from ...logging_config import get_logger
14
+ from ..context import get_context
15
+ from ..validation import check_rate_limit, tool_error_hook, tool_error_mapper
16
+
17
+ logger = get_logger(__name__)
18
+
19
+ # =============================================================================
20
+ # Metrics Processing Helpers
21
+ # =============================================================================
22
+
23
+
24
+ def _collect_samples_from_collector(collector: Any) -> List[Any]:
25
+ """Extract metric samples from a Prometheus collector.
26
+
27
+ Args:
28
+ collector: Prometheus metric collector instance.
29
+
30
+ Returns:
31
+ List of metric samples extracted from the collector.
32
+ """
33
+ if not hasattr(collector, "collect"):
34
+ return []
35
+
36
+ collected = collector.collect()
37
+ if isinstance(collected, list):
38
+ return collected
39
+
40
+ if isinstance(collected, tuple):
41
+ samples = []
42
+ for item in collected:
43
+ if isinstance(item, list):
44
+ samples.extend(item)
45
+ elif hasattr(item, "labels"):
46
+ samples.append(item)
47
+ return samples
48
+
49
+ return []
50
+
51
+
52
+ def _process_tool_calls_metric(
53
+ name: str, labels: Dict[str, str], value: float, tool_calls: Dict[str, Dict[str, int]]
54
+ ) -> None:
55
+ """Process tool_calls metric sample and update aggregation dict.
56
+
57
+ Args:
58
+ name: Metric name.
59
+ labels: Metric labels dict.
60
+ value: Metric value.
61
+ tool_calls: Dict to accumulate tool call counts.
62
+ """
63
+ if "tool_calls" not in name:
64
+ return
65
+
66
+ provider = labels.get("provider", "unknown")
67
+ tool = labels.get("tool", "unknown")
68
+ key = f"{provider}.{tool}"
69
+
70
+ if key not in tool_calls:
71
+ tool_calls[key] = {"count": 0, "errors": 0}
72
+
73
+ if "error" in name:
74
+ tool_calls[key]["errors"] = int(value)
75
+ else:
76
+ tool_calls[key]["count"] = int(value)
77
+
78
+
79
+ def _process_invocations_metric(
80
+ name: str, labels: Dict[str, str], value: float, providers: Dict[str, Dict[str, Any]]
81
+ ) -> None:
82
+ """Process invocations metric sample and update provider stats.
83
+
84
+ Args:
85
+ name: Metric name.
86
+ labels: Metric labels dict.
87
+ value: Metric value.
88
+ providers: Dict to accumulate provider invocation counts.
89
+ """
90
+ if "invocations" not in name or "provider" not in labels:
91
+ return
92
+
93
+ provider = labels.get("provider")
94
+ if provider and provider in providers:
95
+ providers[provider]["invocations"] = int(value)
96
+
97
+
98
+ def _process_discovery_metric(
99
+ name: str, labels: Dict[str, str], value: float, discovery: Dict[str, Dict[str, Any]]
100
+ ) -> None:
101
+ """Process discovery metric sample and update discovery stats.
102
+
103
+ Args:
104
+ name: Metric name.
105
+ labels: Metric labels dict.
106
+ value: Metric value.
107
+ discovery: Dict to accumulate discovery statistics.
108
+ """
109
+ if "discovery" not in name:
110
+ return
111
+
112
+ source = labels.get("source_type", labels.get("source", "unknown"))
113
+ if not source:
114
+ return
115
+
116
+ if source not in discovery:
117
+ discovery[source] = {}
118
+
119
+ if "cycle" in name:
120
+ discovery[source]["cycles"] = int(value)
121
+ elif "providers" in name:
122
+ status = labels.get("status", "total")
123
+ discovery[source][f"providers_{status}"] = int(value)
124
+
125
+
126
+ def _process_error_metric(name: str, labels: Dict[str, str], value: float, errors: Dict[str, int]) -> None:
127
+ """Process error metric sample and update error counts.
128
+
129
+ Args:
130
+ name: Metric name.
131
+ labels: Metric labels dict.
132
+ value: Metric value.
133
+ errors: Dict to accumulate error counts by type.
134
+ """
135
+ if "error" not in name.lower():
136
+ return
137
+
138
+ error_type = labels.get("error_type", labels.get("type", name))
139
+ errors[error_type] = errors.get(error_type, 0) + int(value)
140
+
141
+
142
+ def _process_metric_sample(sample: Any, result: Dict[str, Any]) -> None:
143
+ """Process a single metric sample and update result dict.
144
+
145
+ Routes the sample to appropriate processor based on metric name.
146
+
147
+ Args:
148
+ sample: Metric sample with labels and value attributes.
149
+ result: Result dict to update with processed metrics.
150
+ """
151
+ if not hasattr(sample, "labels") or not hasattr(sample, "value"):
152
+ return
153
+
154
+ labels = sample.labels or {}
155
+ value = sample.value
156
+ name = getattr(sample, "name", "")
157
+
158
+ _process_tool_calls_metric(name, labels, value, result["tool_calls"])
159
+ _process_invocations_metric(name, labels, value, result["providers"])
160
+ _process_discovery_metric(name, labels, value, result["discovery"])
161
+ _process_error_metric(name, labels, value, result["errors"])
162
+
163
+
164
+ def register_health_tools(mcp: FastMCP) -> None:
165
+ """Register health and metrics tools with MCP server."""
166
+
167
+ @mcp.tool(name="registry_health")
168
+ @mcp_tool_wrapper(
169
+ tool_name="registry_health",
170
+ rate_limit_key=key_global,
171
+ check_rate_limit=lambda key: check_rate_limit("registry_health"),
172
+ validate=None,
173
+ error_mapper=lambda exc: tool_error_mapper(exc),
174
+ on_error=tool_error_hook,
175
+ )
176
+ def registry_health() -> dict:
177
+ """
178
+ Get registry health status including security metrics.
179
+
180
+ This is a QUERY operation - read only.
181
+
182
+ Returns:
183
+ Dictionary with health information
184
+ """
185
+ ctx = get_context()
186
+ rate_limit_stats = ctx.rate_limiter.get_stats()
187
+
188
+ # Get all providers via repository
189
+ all_providers = ctx.repository.get_all()
190
+ providers = list(all_providers.values())
191
+ state_counts = {}
192
+ for p in providers:
193
+ state = str(p.state)
194
+ state_counts[state] = state_counts.get(state, 0) + 1
195
+
196
+ group_state_counts = {}
197
+ total_group_members = 0
198
+ healthy_group_members = 0
199
+ for group in ctx.groups.values():
200
+ state = group.state.value
201
+ group_state_counts[state] = group_state_counts.get(state, 0) + 1
202
+ total_group_members += group.total_count
203
+ healthy_group_members += group.healthy_count
204
+
205
+ return {
206
+ "status": "healthy",
207
+ "providers": {
208
+ "total": len(providers),
209
+ "by_state": state_counts,
210
+ },
211
+ "groups": {
212
+ "total": len(ctx.groups),
213
+ "by_state": group_state_counts,
214
+ "total_members": total_group_members,
215
+ "healthy_members": healthy_group_members,
216
+ },
217
+ "security": {
218
+ "rate_limiting": rate_limit_stats,
219
+ },
220
+ }
221
+
222
+ @mcp.tool(name="registry_metrics")
223
+ @mcp_tool_wrapper(
224
+ tool_name="registry_metrics",
225
+ rate_limit_key=key_global,
226
+ check_rate_limit=lambda key: check_rate_limit("registry_metrics"),
227
+ validate=None,
228
+ error_mapper=lambda exc: tool_error_mapper(exc),
229
+ on_error=tool_error_hook,
230
+ )
231
+ def registry_metrics(format: str = "json") -> dict:
232
+ """
233
+ Get detailed metrics for all providers, groups, and system components.
234
+
235
+ This is a QUERY operation - read only.
236
+
237
+ Args:
238
+ format: Output format - 'json' (structured) or 'prometheus' (raw text)
239
+
240
+ Returns:
241
+ Dictionary with comprehensive metrics
242
+ """
243
+ ctx = get_context()
244
+
245
+ if format == "prometheus":
246
+ return {"metrics": m.REGISTRY.render()}
247
+
248
+ result: Dict[str, Any] = {
249
+ "providers": {},
250
+ "groups": {},
251
+ "tool_calls": {},
252
+ "discovery": {},
253
+ "errors": {},
254
+ "performance": {},
255
+ }
256
+
257
+ # Provider metrics via repository
258
+ all_providers = ctx.repository.get_all()
259
+ for provider in all_providers.values():
260
+ pid = provider.provider_id
261
+ result["providers"][pid] = {
262
+ "state": str(provider.state),
263
+ "mode": provider._mode.value if hasattr(provider, "_mode") else "unknown",
264
+ "tools_count": len(provider.tools) if provider.tools else 0,
265
+ "invocations": 0,
266
+ "errors": 0,
267
+ "avg_latency_ms": 0,
268
+ }
269
+
270
+ # Collect metrics from registry
271
+ for name, collector in m.REGISTRY._collectors.items():
272
+ try:
273
+ samples = _collect_samples_from_collector(collector)
274
+ for sample in samples:
275
+ # Add collector name to sample for processing
276
+ if not hasattr(sample, "name"):
277
+ sample.name = name
278
+ _process_metric_sample(sample, result)
279
+ except (AttributeError, TypeError, ValueError) as e:
280
+ # Skip malformed collectors gracefully
281
+ logger.debug("metrics_collector_error", collector=name, error=str(e))
282
+ continue
283
+
284
+ # Group metrics
285
+ for group in ctx.groups.values():
286
+ result["groups"][group.name] = {
287
+ "state": group.state.value,
288
+ "strategy": group.strategy,
289
+ "total_members": group.total_count,
290
+ "healthy_members": group.healthy_count,
291
+ }
292
+
293
+ # Summary stats
294
+ result["summary"] = {
295
+ "total_providers": len(result["providers"]),
296
+ "total_groups": len(result["groups"]),
297
+ "total_tool_calls": sum(tc.get("count", 0) for tc in result["tool_calls"].values()),
298
+ "total_errors": sum(result["errors"].values()),
299
+ }
300
+
301
+ return result