mcp-hangar 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (160) hide show
  1. mcp_hangar/__init__.py +139 -0
  2. mcp_hangar/application/__init__.py +1 -0
  3. mcp_hangar/application/commands/__init__.py +67 -0
  4. mcp_hangar/application/commands/auth_commands.py +118 -0
  5. mcp_hangar/application/commands/auth_handlers.py +296 -0
  6. mcp_hangar/application/commands/commands.py +59 -0
  7. mcp_hangar/application/commands/handlers.py +189 -0
  8. mcp_hangar/application/discovery/__init__.py +21 -0
  9. mcp_hangar/application/discovery/discovery_metrics.py +283 -0
  10. mcp_hangar/application/discovery/discovery_orchestrator.py +497 -0
  11. mcp_hangar/application/discovery/lifecycle_manager.py +315 -0
  12. mcp_hangar/application/discovery/security_validator.py +414 -0
  13. mcp_hangar/application/event_handlers/__init__.py +50 -0
  14. mcp_hangar/application/event_handlers/alert_handler.py +191 -0
  15. mcp_hangar/application/event_handlers/audit_handler.py +203 -0
  16. mcp_hangar/application/event_handlers/knowledge_base_handler.py +120 -0
  17. mcp_hangar/application/event_handlers/logging_handler.py +69 -0
  18. mcp_hangar/application/event_handlers/metrics_handler.py +152 -0
  19. mcp_hangar/application/event_handlers/persistent_audit_store.py +217 -0
  20. mcp_hangar/application/event_handlers/security_handler.py +604 -0
  21. mcp_hangar/application/mcp/tooling.py +158 -0
  22. mcp_hangar/application/ports/__init__.py +9 -0
  23. mcp_hangar/application/ports/observability.py +237 -0
  24. mcp_hangar/application/queries/__init__.py +52 -0
  25. mcp_hangar/application/queries/auth_handlers.py +237 -0
  26. mcp_hangar/application/queries/auth_queries.py +118 -0
  27. mcp_hangar/application/queries/handlers.py +227 -0
  28. mcp_hangar/application/read_models/__init__.py +11 -0
  29. mcp_hangar/application/read_models/provider_views.py +139 -0
  30. mcp_hangar/application/sagas/__init__.py +11 -0
  31. mcp_hangar/application/sagas/group_rebalance_saga.py +137 -0
  32. mcp_hangar/application/sagas/provider_failover_saga.py +266 -0
  33. mcp_hangar/application/sagas/provider_recovery_saga.py +172 -0
  34. mcp_hangar/application/services/__init__.py +9 -0
  35. mcp_hangar/application/services/provider_service.py +208 -0
  36. mcp_hangar/application/services/traced_provider_service.py +211 -0
  37. mcp_hangar/bootstrap/runtime.py +328 -0
  38. mcp_hangar/context.py +178 -0
  39. mcp_hangar/domain/__init__.py +117 -0
  40. mcp_hangar/domain/contracts/__init__.py +57 -0
  41. mcp_hangar/domain/contracts/authentication.py +225 -0
  42. mcp_hangar/domain/contracts/authorization.py +229 -0
  43. mcp_hangar/domain/contracts/event_store.py +178 -0
  44. mcp_hangar/domain/contracts/metrics_publisher.py +59 -0
  45. mcp_hangar/domain/contracts/persistence.py +383 -0
  46. mcp_hangar/domain/contracts/provider_runtime.py +146 -0
  47. mcp_hangar/domain/discovery/__init__.py +20 -0
  48. mcp_hangar/domain/discovery/conflict_resolver.py +267 -0
  49. mcp_hangar/domain/discovery/discovered_provider.py +185 -0
  50. mcp_hangar/domain/discovery/discovery_service.py +412 -0
  51. mcp_hangar/domain/discovery/discovery_source.py +192 -0
  52. mcp_hangar/domain/events.py +433 -0
  53. mcp_hangar/domain/exceptions.py +525 -0
  54. mcp_hangar/domain/model/__init__.py +70 -0
  55. mcp_hangar/domain/model/aggregate.py +58 -0
  56. mcp_hangar/domain/model/circuit_breaker.py +152 -0
  57. mcp_hangar/domain/model/event_sourced_api_key.py +413 -0
  58. mcp_hangar/domain/model/event_sourced_provider.py +423 -0
  59. mcp_hangar/domain/model/event_sourced_role_assignment.py +268 -0
  60. mcp_hangar/domain/model/health_tracker.py +183 -0
  61. mcp_hangar/domain/model/load_balancer.py +185 -0
  62. mcp_hangar/domain/model/provider.py +810 -0
  63. mcp_hangar/domain/model/provider_group.py +656 -0
  64. mcp_hangar/domain/model/tool_catalog.py +105 -0
  65. mcp_hangar/domain/policies/__init__.py +19 -0
  66. mcp_hangar/domain/policies/provider_health.py +187 -0
  67. mcp_hangar/domain/repository.py +249 -0
  68. mcp_hangar/domain/security/__init__.py +85 -0
  69. mcp_hangar/domain/security/input_validator.py +710 -0
  70. mcp_hangar/domain/security/rate_limiter.py +387 -0
  71. mcp_hangar/domain/security/roles.py +237 -0
  72. mcp_hangar/domain/security/sanitizer.py +387 -0
  73. mcp_hangar/domain/security/secrets.py +501 -0
  74. mcp_hangar/domain/services/__init__.py +20 -0
  75. mcp_hangar/domain/services/audit_service.py +376 -0
  76. mcp_hangar/domain/services/image_builder.py +328 -0
  77. mcp_hangar/domain/services/provider_launcher.py +1046 -0
  78. mcp_hangar/domain/value_objects.py +1138 -0
  79. mcp_hangar/errors.py +818 -0
  80. mcp_hangar/fastmcp_server.py +1105 -0
  81. mcp_hangar/gc.py +134 -0
  82. mcp_hangar/infrastructure/__init__.py +79 -0
  83. mcp_hangar/infrastructure/async_executor.py +133 -0
  84. mcp_hangar/infrastructure/auth/__init__.py +37 -0
  85. mcp_hangar/infrastructure/auth/api_key_authenticator.py +388 -0
  86. mcp_hangar/infrastructure/auth/event_sourced_store.py +567 -0
  87. mcp_hangar/infrastructure/auth/jwt_authenticator.py +360 -0
  88. mcp_hangar/infrastructure/auth/middleware.py +340 -0
  89. mcp_hangar/infrastructure/auth/opa_authorizer.py +243 -0
  90. mcp_hangar/infrastructure/auth/postgres_store.py +659 -0
  91. mcp_hangar/infrastructure/auth/projections.py +366 -0
  92. mcp_hangar/infrastructure/auth/rate_limiter.py +311 -0
  93. mcp_hangar/infrastructure/auth/rbac_authorizer.py +323 -0
  94. mcp_hangar/infrastructure/auth/sqlite_store.py +624 -0
  95. mcp_hangar/infrastructure/command_bus.py +112 -0
  96. mcp_hangar/infrastructure/discovery/__init__.py +110 -0
  97. mcp_hangar/infrastructure/discovery/docker_source.py +289 -0
  98. mcp_hangar/infrastructure/discovery/entrypoint_source.py +249 -0
  99. mcp_hangar/infrastructure/discovery/filesystem_source.py +383 -0
  100. mcp_hangar/infrastructure/discovery/kubernetes_source.py +247 -0
  101. mcp_hangar/infrastructure/event_bus.py +260 -0
  102. mcp_hangar/infrastructure/event_sourced_repository.py +443 -0
  103. mcp_hangar/infrastructure/event_store.py +396 -0
  104. mcp_hangar/infrastructure/knowledge_base/__init__.py +259 -0
  105. mcp_hangar/infrastructure/knowledge_base/contracts.py +202 -0
  106. mcp_hangar/infrastructure/knowledge_base/memory.py +177 -0
  107. mcp_hangar/infrastructure/knowledge_base/postgres.py +545 -0
  108. mcp_hangar/infrastructure/knowledge_base/sqlite.py +513 -0
  109. mcp_hangar/infrastructure/metrics_publisher.py +36 -0
  110. mcp_hangar/infrastructure/observability/__init__.py +10 -0
  111. mcp_hangar/infrastructure/observability/langfuse_adapter.py +534 -0
  112. mcp_hangar/infrastructure/persistence/__init__.py +33 -0
  113. mcp_hangar/infrastructure/persistence/audit_repository.py +371 -0
  114. mcp_hangar/infrastructure/persistence/config_repository.py +398 -0
  115. mcp_hangar/infrastructure/persistence/database.py +333 -0
  116. mcp_hangar/infrastructure/persistence/database_common.py +330 -0
  117. mcp_hangar/infrastructure/persistence/event_serializer.py +280 -0
  118. mcp_hangar/infrastructure/persistence/event_upcaster.py +166 -0
  119. mcp_hangar/infrastructure/persistence/in_memory_event_store.py +150 -0
  120. mcp_hangar/infrastructure/persistence/recovery_service.py +312 -0
  121. mcp_hangar/infrastructure/persistence/sqlite_event_store.py +386 -0
  122. mcp_hangar/infrastructure/persistence/unit_of_work.py +409 -0
  123. mcp_hangar/infrastructure/persistence/upcasters/README.md +13 -0
  124. mcp_hangar/infrastructure/persistence/upcasters/__init__.py +7 -0
  125. mcp_hangar/infrastructure/query_bus.py +153 -0
  126. mcp_hangar/infrastructure/saga_manager.py +401 -0
  127. mcp_hangar/logging_config.py +209 -0
  128. mcp_hangar/metrics.py +1007 -0
  129. mcp_hangar/models.py +31 -0
  130. mcp_hangar/observability/__init__.py +54 -0
  131. mcp_hangar/observability/health.py +487 -0
  132. mcp_hangar/observability/metrics.py +319 -0
  133. mcp_hangar/observability/tracing.py +433 -0
  134. mcp_hangar/progress.py +542 -0
  135. mcp_hangar/retry.py +613 -0
  136. mcp_hangar/server/__init__.py +120 -0
  137. mcp_hangar/server/__main__.py +6 -0
  138. mcp_hangar/server/auth_bootstrap.py +340 -0
  139. mcp_hangar/server/auth_cli.py +335 -0
  140. mcp_hangar/server/auth_config.py +305 -0
  141. mcp_hangar/server/bootstrap.py +735 -0
  142. mcp_hangar/server/cli.py +161 -0
  143. mcp_hangar/server/config.py +224 -0
  144. mcp_hangar/server/context.py +215 -0
  145. mcp_hangar/server/http_auth_middleware.py +165 -0
  146. mcp_hangar/server/lifecycle.py +467 -0
  147. mcp_hangar/server/state.py +117 -0
  148. mcp_hangar/server/tools/__init__.py +16 -0
  149. mcp_hangar/server/tools/discovery.py +186 -0
  150. mcp_hangar/server/tools/groups.py +75 -0
  151. mcp_hangar/server/tools/health.py +301 -0
  152. mcp_hangar/server/tools/provider.py +939 -0
  153. mcp_hangar/server/tools/registry.py +320 -0
  154. mcp_hangar/server/validation.py +113 -0
  155. mcp_hangar/stdio_client.py +229 -0
  156. mcp_hangar-0.2.0.dist-info/METADATA +347 -0
  157. mcp_hangar-0.2.0.dist-info/RECORD +160 -0
  158. mcp_hangar-0.2.0.dist-info/WHEEL +4 -0
  159. mcp_hangar-0.2.0.dist-info/entry_points.txt +2 -0
  160. mcp_hangar-0.2.0.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,320 @@
1
+ """Registry management tools: list, start, stop, status.
2
+
3
+ Uses ApplicationContext for dependency injection (DIP).
4
+ Separates commands (write) from queries (read) following CQRS.
5
+ """
6
+
7
+ import time
8
+ from typing import Optional
9
+
10
+ from mcp.server.fastmcp import FastMCP
11
+
12
+ from ...application.commands import StartProviderCommand, StopProviderCommand
13
+ from ...application.mcp.tooling import key_global, mcp_tool_wrapper
14
+ from ...infrastructure.query_bus import ListProvidersQuery
15
+ from ..context import get_context
16
+ from ..validation import check_rate_limit, tool_error_hook, tool_error_mapper, validate_provider_id_input
17
+
18
+ # Server start time for uptime calculation
19
+ _server_start_time: float = time.time()
20
+
21
+
22
+ def registry_list(state_filter: Optional[str] = None) -> dict:
23
+ """
24
+ List all providers and groups with status and metadata.
25
+
26
+ This is a QUERY operation - no side effects, only reads data.
27
+
28
+ Args:
29
+ state_filter: Optional filter by state (cold, ready, degraded, dead)
30
+
31
+ Returns:
32
+ Dictionary with 'providers' and 'groups' keys
33
+ """
34
+ ctx = get_context()
35
+
36
+ # Query via CQRS query bus
37
+ query = ListProvidersQuery(state_filter=state_filter)
38
+ summaries = ctx.query_bus.execute(query)
39
+
40
+ # Read groups from context
41
+ groups_list = []
42
+ for group_id, group in ctx.groups.items():
43
+ group_info = group.to_status_dict()
44
+ if state_filter and group_info.get("state") != state_filter:
45
+ continue
46
+ groups_list.append(group_info)
47
+
48
+ return {
49
+ "providers": [s.to_dict() for s in summaries],
50
+ "groups": groups_list,
51
+ }
52
+
53
+
54
+ def register_registry_tools(mcp: FastMCP) -> None:
55
+ """Register registry management tools with MCP server."""
56
+
57
+ @mcp.tool(name="registry_list")
58
+ @mcp_tool_wrapper(
59
+ tool_name="registry_list",
60
+ rate_limit_key=key_global,
61
+ check_rate_limit=lambda key: check_rate_limit("registry_list"),
62
+ validate=None,
63
+ error_mapper=lambda exc: tool_error_mapper(exc),
64
+ on_error=tool_error_hook,
65
+ )
66
+ def _registry_list(state_filter: Optional[str] = None) -> dict:
67
+ return registry_list(state_filter)
68
+
69
+ @mcp.tool(name="registry_start")
70
+ @mcp_tool_wrapper(
71
+ tool_name="registry_start",
72
+ rate_limit_key=lambda provider: f"registry_start:{provider}",
73
+ check_rate_limit=check_rate_limit,
74
+ validate=validate_provider_id_input,
75
+ error_mapper=lambda exc: tool_error_mapper(exc),
76
+ on_error=lambda exc, ctx: tool_error_hook(exc, ctx),
77
+ )
78
+ def registry_start(provider: str) -> dict:
79
+ """
80
+ Explicitly start a provider or all members of a group.
81
+
82
+ This is a COMMAND operation - it changes state.
83
+
84
+ Args:
85
+ provider: Provider ID or Group ID to start
86
+
87
+ Returns:
88
+ Dictionary with provider/group state and tools
89
+
90
+ Raises:
91
+ ValueError: If provider ID is unknown or invalid
92
+ """
93
+ ctx = get_context()
94
+
95
+ # Check if it's a group first
96
+ if ctx.group_exists(provider):
97
+ group = ctx.get_group(provider)
98
+ started = group.start_all()
99
+ return {
100
+ "group": provider,
101
+ "state": group.state.value,
102
+ "members_started": started,
103
+ "healthy_count": group.healthy_count,
104
+ "total_members": group.total_count,
105
+ }
106
+
107
+ # Check provider exists
108
+ if not ctx.provider_exists(provider):
109
+ raise ValueError(f"unknown_provider: {provider}")
110
+
111
+ # Send command via CQRS command bus
112
+ command = StartProviderCommand(provider_id=provider)
113
+ return ctx.command_bus.send(command)
114
+
115
+ @mcp.tool(name="registry_stop")
116
+ @mcp_tool_wrapper(
117
+ tool_name="registry_stop",
118
+ rate_limit_key=lambda provider: f"registry_stop:{provider}",
119
+ check_rate_limit=check_rate_limit,
120
+ validate=validate_provider_id_input,
121
+ error_mapper=lambda exc: tool_error_mapper(exc),
122
+ on_error=lambda exc, ctx_dict: tool_error_hook(exc, ctx_dict),
123
+ )
124
+ def registry_stop(provider: str) -> dict:
125
+ """
126
+ Explicitly stop a provider or all members of a group.
127
+
128
+ This is a COMMAND operation - it changes state.
129
+
130
+ Args:
131
+ provider: Provider ID or Group ID to stop
132
+
133
+ Returns:
134
+ Confirmation dictionary
135
+
136
+ Raises:
137
+ ValueError: If provider ID is unknown or invalid
138
+ """
139
+ ctx = get_context()
140
+
141
+ # Check if it's a group first
142
+ if ctx.group_exists(provider):
143
+ group = ctx.get_group(provider)
144
+ group.stop_all()
145
+ return {
146
+ "group": provider,
147
+ "state": group.state.value,
148
+ "stopped": True,
149
+ }
150
+
151
+ # Check provider exists
152
+ if not ctx.provider_exists(provider):
153
+ raise ValueError(f"unknown_provider: {provider}")
154
+
155
+ # Send command via CQRS command bus
156
+ command = StopProviderCommand(provider_id=provider)
157
+ return ctx.command_bus.send(command)
158
+
159
+ @mcp.tool(name="registry_status")
160
+ @mcp_tool_wrapper(
161
+ tool_name="registry_status",
162
+ rate_limit_key=key_global,
163
+ check_rate_limit=lambda key: check_rate_limit("registry_status"),
164
+ validate=None,
165
+ error_mapper=lambda exc: tool_error_mapper(exc),
166
+ on_error=tool_error_hook,
167
+ )
168
+ def registry_status() -> dict:
169
+ """
170
+ Get a comprehensive status overview of the MCP Registry.
171
+
172
+ Shows status of all providers with visual indicators:
173
+ - ✅ ready: Provider is running and healthy
174
+ - ⏸️ idle: Provider is cold, will start on first request
175
+ - 🔄 starting: Provider is starting up
176
+ - ❌ error: Provider has errors or is degraded
177
+
178
+ Returns:
179
+ Dictionary with providers, groups, health summary, and uptime
180
+ """
181
+ ctx = get_context()
182
+
183
+ # Get all providers
184
+ query = ListProvidersQuery(state_filter=None)
185
+ summaries = ctx.query_bus.execute(query)
186
+
187
+ # Format providers with status indicators
188
+ providers_status = []
189
+ healthy_count = 0
190
+ total_count = len(summaries)
191
+
192
+ for summary in summaries:
193
+ state = summary.state
194
+ indicator = _get_status_indicator(state)
195
+
196
+ provider_info = {
197
+ "id": summary.provider_id,
198
+ "indicator": indicator,
199
+ "state": state,
200
+ "mode": summary.mode,
201
+ }
202
+
203
+ # Add additional context based on state
204
+ if state == "ready":
205
+ healthy_count += 1
206
+ if hasattr(summary, "last_used_ago_s"):
207
+ provider_info["last_used"] = _format_time_ago(summary.last_used_ago_s)
208
+ elif state == "cold":
209
+ provider_info["note"] = "Will start on first request"
210
+ elif state == "degraded":
211
+ if hasattr(summary, "consecutive_failures"):
212
+ provider_info["consecutive_failures"] = summary.consecutive_failures
213
+
214
+ providers_status.append(provider_info)
215
+
216
+ # Get groups
217
+ groups_status = []
218
+ for group_id, group in ctx.groups.items():
219
+ group_info = {
220
+ "id": group_id,
221
+ "indicator": _get_status_indicator(group.state.value),
222
+ "state": group.state.value,
223
+ "healthy_members": group.healthy_count,
224
+ "total_members": group.total_count,
225
+ }
226
+ groups_status.append(group_info)
227
+
228
+ # Calculate uptime
229
+ uptime_s = time.time() - _server_start_time
230
+ uptime_formatted = _format_uptime(uptime_s)
231
+
232
+ return {
233
+ "providers": providers_status,
234
+ "groups": groups_status,
235
+ "summary": {
236
+ "healthy_providers": healthy_count,
237
+ "total_providers": total_count,
238
+ "uptime": uptime_formatted,
239
+ "uptime_seconds": round(uptime_s, 1),
240
+ },
241
+ "formatted": _format_status_dashboard(
242
+ providers_status, groups_status, healthy_count, total_count, uptime_formatted
243
+ ),
244
+ }
245
+
246
+
247
+ def _get_status_indicator(state: str) -> str:
248
+ """Get visual indicator for provider state."""
249
+ indicators = {
250
+ "ready": "✅",
251
+ "cold": "⏸️",
252
+ "starting": "🔄",
253
+ "degraded": "⚠️",
254
+ "dead": "❌",
255
+ "error": "❌",
256
+ }
257
+ return indicators.get(state.lower(), "❓")
258
+
259
+
260
+ def _format_time_ago(seconds: float) -> str:
261
+ """Format seconds as human-readable 'time ago' string."""
262
+ if seconds < 60:
263
+ return f"{int(seconds)}s ago"
264
+ elif seconds < 3600:
265
+ return f"{int(seconds / 60)}m ago"
266
+ else:
267
+ return f"{int(seconds / 3600)}h ago"
268
+
269
+
270
+ def _format_uptime(seconds: float) -> str:
271
+ """Format uptime as human-readable string."""
272
+ hours = int(seconds // 3600)
273
+ minutes = int((seconds % 3600) // 60)
274
+ if hours > 0:
275
+ return f"{hours}h {minutes}m"
276
+ return f"{minutes}m"
277
+
278
+
279
+ def _format_status_dashboard(
280
+ providers: list,
281
+ groups: list,
282
+ healthy: int,
283
+ total: int,
284
+ uptime: str,
285
+ ) -> str:
286
+ """Format status as ASCII dashboard."""
287
+ lines = [
288
+ "╭─────────────────────────────────────────────────╮",
289
+ "│ MCP-Hangar Status │",
290
+ "├─────────────────────────────────────────────────┤",
291
+ ]
292
+
293
+ # Providers
294
+ for p in providers:
295
+ indicator = p["indicator"]
296
+ name = p["id"][:15].ljust(15)
297
+ state = p["state"][:8].ljust(8)
298
+ extra = ""
299
+ if "last_used" in p:
300
+ extra = f"last: {p['last_used']}"
301
+ elif "note" in p:
302
+ extra = p["note"][:20]
303
+ line = f"│ {indicator} {name} {state} {extra[:22].ljust(22)}│"
304
+ lines.append(line)
305
+
306
+ # Groups
307
+ for g in groups:
308
+ indicator = g["indicator"]
309
+ name = g["id"][:15].ljust(15)
310
+ state = g["state"][:8].ljust(8)
311
+ extra = f"{g['healthy_members']}/{g['total_members']} healthy"
312
+ line = f"│ {indicator} {name} {state} {extra[:22].ljust(22)}│"
313
+ lines.append(line)
314
+
315
+ lines.append("├─────────────────────────────────────────────────┤")
316
+ lines.append(f"│ Health: {healthy}/{total} providers healthy".ljust(50) + "│")
317
+ lines.append(f"│ Uptime: {uptime}".ljust(50) + "│")
318
+ lines.append("╰─────────────────────────────────────────────────╯")
319
+
320
+ return "\n".join(lines)
@@ -0,0 +1,113 @@
1
+ """Validation and error handling for MCP tools.
2
+
3
+ This module provides validation functions that use the ApplicationContext
4
+ for accessing rate limiter and security handler, following DIP.
5
+ """
6
+
7
+ from ..application.mcp.tooling import ToolErrorPayload
8
+ from ..domain.exceptions import RateLimitExceeded
9
+ from ..domain.security.input_validator import (
10
+ validate_arguments,
11
+ validate_provider_id,
12
+ validate_timeout,
13
+ validate_tool_name,
14
+ )
15
+ from .context import get_context
16
+
17
+
18
+ def check_rate_limit(key: str = "global") -> None:
19
+ """Check rate limit and raise exception if exceeded.
20
+
21
+ Gets rate limiter from application context (DIP).
22
+ """
23
+ ctx = get_context()
24
+ result = ctx.rate_limiter.consume(key)
25
+ if not result.allowed:
26
+ ctx.security_handler.log_rate_limit_exceeded(
27
+ limit=result.limit,
28
+ window_seconds=int(1.0 / result.limit) if result.limit else 1,
29
+ )
30
+ raise RateLimitExceeded(
31
+ limit=result.limit,
32
+ window_seconds=int(1.0 / result.limit) if result.limit else 1,
33
+ )
34
+
35
+
36
+ def tool_error_mapper(exc: Exception) -> ToolErrorPayload:
37
+ """Map exceptions to a stable MCP tool error payload."""
38
+ return ToolErrorPayload(
39
+ error=str(exc) or "unknown error",
40
+ error_type=type(exc).__name__,
41
+ details={},
42
+ )
43
+
44
+
45
+ def tool_error_hook(exc: Exception, context: dict) -> None:
46
+ """Best-effort hook for logging/security telemetry on tool failures.
47
+
48
+ Gets security handler from application context (DIP).
49
+
50
+ Args:
51
+ exc: The exception that occurred.
52
+ context: Additional context dict with provider_id, tool, etc.
53
+ """
54
+ try:
55
+ ctx = get_context()
56
+ ctx.security_handler.log_validation_failed(
57
+ field="tool",
58
+ message=f"{type(exc).__name__}: {str(exc) or 'unknown error'}",
59
+ provider_id=context.get("provider_id"),
60
+ value=context.get("provider_id"),
61
+ )
62
+ except (RuntimeError, AttributeError, TypeError):
63
+ # Context not initialized or handler missing - skip silently
64
+ pass
65
+
66
+
67
+ def validate_provider_id_input(provider: str) -> None:
68
+ """Validate provider ID and raise exception if invalid."""
69
+ result = validate_provider_id(provider)
70
+ if not result.valid:
71
+ ctx = get_context()
72
+ ctx.security_handler.log_validation_failed(
73
+ field="provider",
74
+ message=(result.errors[0].message if result.errors else "Invalid provider ID"),
75
+ provider_id=provider,
76
+ )
77
+ raise ValueError(f"invalid_provider_id: {result.errors[0].message if result.errors else 'validation failed'}")
78
+
79
+
80
+ def validate_tool_name_input(tool: str) -> None:
81
+ """Validate tool name and raise exception if invalid."""
82
+ result = validate_tool_name(tool)
83
+ if not result.valid:
84
+ ctx = get_context()
85
+ ctx.security_handler.log_validation_failed(
86
+ field="tool",
87
+ message=result.errors[0].message if result.errors else "Invalid tool name",
88
+ )
89
+ raise ValueError(f"invalid_tool_name: {result.errors[0].message if result.errors else 'validation failed'}")
90
+
91
+
92
+ def validate_arguments_input(arguments: dict) -> None:
93
+ """Validate tool arguments and raise exception if invalid."""
94
+ result = validate_arguments(arguments)
95
+ if not result.valid:
96
+ ctx = get_context()
97
+ ctx.security_handler.log_validation_failed(
98
+ field="arguments",
99
+ message=result.errors[0].message if result.errors else "Invalid arguments",
100
+ )
101
+ raise ValueError(f"invalid_arguments: {result.errors[0].message if result.errors else 'validation failed'}")
102
+
103
+
104
+ def validate_timeout_input(timeout: float) -> None:
105
+ """Validate timeout and raise exception if invalid."""
106
+ result = validate_timeout(timeout)
107
+ if not result.valid:
108
+ ctx = get_context()
109
+ ctx.security_handler.log_validation_failed(
110
+ field="timeout",
111
+ message=result.errors[0].message if result.errors else "Invalid timeout",
112
+ )
113
+ raise ValueError(f"invalid_timeout: {result.errors[0].message if result.errors else 'validation failed'}")
@@ -0,0 +1,229 @@
1
+ """Thread-safe stdio client with proper message correlation."""
2
+
3
+ from dataclasses import dataclass
4
+ import json
5
+ from queue import Empty, Queue
6
+ import subprocess
7
+ import threading
8
+ import time
9
+ from typing import Any, Dict
10
+ import uuid
11
+
12
+ from .domain.exceptions import ClientError
13
+ from .logging_config import get_logger
14
+
15
+ logger = get_logger(__name__)
16
+
17
+
18
+ @dataclass
19
+ class PendingRequest:
20
+ """Tracks a pending RPC request waiting for a response."""
21
+
22
+ request_id: str
23
+ result_queue: Queue
24
+ started_at: float
25
+
26
+
27
+ class StdioClient:
28
+ """
29
+ Thread-safe JSON-RPC client over stdio.
30
+ Handles message correlation, timeouts, and process lifecycle.
31
+ """
32
+
33
+ def __init__(self, popen: subprocess.Popen):
34
+ """
35
+ Initialize client with a running subprocess.
36
+
37
+ Args:
38
+ popen: subprocess.Popen instance with stdin/stdout pipes
39
+ """
40
+ self.process = popen
41
+ self.pending: Dict[str, PendingRequest] = {}
42
+ self.pending_lock = threading.Lock()
43
+ self.reader_thread = threading.Thread(target=self._reader_loop, daemon=True)
44
+ self.closed = False
45
+ self.reader_thread.start()
46
+
47
+ def _reader_loop(self):
48
+ """
49
+ Read stdout and dispatch responses to waiting callers.
50
+ Runs in a dedicated daemon thread.
51
+ """
52
+ logger.info("stdio_client_reader_started", pid=self.process.pid)
53
+ while not self.closed:
54
+ try:
55
+ line = self.process.stdout.readline()
56
+ if not line:
57
+ # EOF reached, process died
58
+ logger.warning("stdio_client_eof_on_stdout")
59
+ self._capture_process_stderr()
60
+ break
61
+
62
+ line = line.strip()
63
+ if not line:
64
+ continue
65
+
66
+ try:
67
+ msg = json.loads(line)
68
+ except json.JSONDecodeError as e:
69
+ logger.error("stdio_client_malformed_json", preview=line[:100], error=str(e))
70
+ continue
71
+
72
+ msg_id = msg.get("id")
73
+
74
+ if msg_id:
75
+ # This is a response to a request
76
+ with self.pending_lock:
77
+ pending = self.pending.pop(msg_id, None)
78
+
79
+ if pending:
80
+ pending.result_queue.put(msg)
81
+ else:
82
+ logger.warning("stdio_client_unknown_request", request_id=msg_id)
83
+ else:
84
+ # Unsolicited notification - log and ignore
85
+ logger.debug("stdio_client_notification", message=msg)
86
+
87
+ except Exception as e:
88
+ logger.error("stdio_client_reader_error", error=str(e))
89
+ break
90
+
91
+ # Clean up on exit
92
+ self._cleanup_pending("reader_died")
93
+
94
+ def _capture_process_stderr(self) -> None:
95
+ """Capture and log stderr from the process for debugging."""
96
+ try:
97
+ # Log exit code
98
+ rc = self.process.poll()
99
+ if rc is not None:
100
+ logger.error("stdio_client_process_exited", exit_code=rc)
101
+
102
+ # Try to read stderr if available
103
+ stderr = getattr(self.process, "stderr", None)
104
+ if stderr:
105
+ try:
106
+ # Read available stderr (non-blocking would be ideal, but read() works post-exit)
107
+ err_bytes = stderr.read()
108
+ if err_bytes:
109
+ err_text = (
110
+ err_bytes if isinstance(err_bytes, str) else err_bytes.decode(errors="replace")
111
+ ).strip()
112
+ if err_text:
113
+ # Log first 2000 chars to avoid log spam
114
+ if len(err_text) > 2000:
115
+ err_text = err_text[:2000] + "... (truncated)"
116
+ logger.error("stdio_client_process_stderr", stderr=err_text)
117
+ except Exception as read_err:
118
+ logger.debug("stdio_client_stderr_read_failed", error=str(read_err))
119
+ except Exception as e:
120
+ logger.debug("stdio_client_capture_error", error=str(e))
121
+
122
+ def _cleanup_pending(self, error_msg: str):
123
+ """Clean up all pending requests on shutdown or error."""
124
+ with self.pending_lock:
125
+ for pending in self.pending.values():
126
+ pending.result_queue.put({"error": {"code": -1, "message": error_msg}})
127
+ self.pending.clear()
128
+
129
+ def call(self, method: str, params: Dict[str, Any], timeout: float = 15.0) -> Dict[str, Any]:
130
+ """
131
+ Synchronous RPC call with explicit timeout.
132
+
133
+ Args:
134
+ method: JSON-RPC method name
135
+ params: Method parameters
136
+ timeout: Timeout in seconds
137
+
138
+ Returns:
139
+ Response dictionary with either 'result' or 'error' key
140
+
141
+ Raises:
142
+ ClientError: If the client is closed or write fails
143
+ TimeoutError: If the request times out
144
+ """
145
+ if self.closed:
146
+ raise ClientError("client_closed")
147
+
148
+ request_id = str(uuid.uuid4())
149
+ result_queue = Queue(maxsize=1)
150
+
151
+ pending = PendingRequest(request_id=request_id, result_queue=result_queue, started_at=time.time())
152
+
153
+ with self.pending_lock:
154
+ self.pending[request_id] = pending
155
+
156
+ request = {
157
+ "jsonrpc": "2.0",
158
+ "id": request_id,
159
+ "method": method,
160
+ "params": params,
161
+ }
162
+
163
+ try:
164
+ request_str = json.dumps(request) + "\n"
165
+ logger.info(
166
+ "stdio_client_sending_request",
167
+ method=method,
168
+ pid=self.process.pid,
169
+ alive=self.process.poll() is None,
170
+ )
171
+ self.process.stdin.write(request_str)
172
+ self.process.stdin.flush()
173
+ logger.debug("stdio_client_request_sent")
174
+ except Exception as e:
175
+ logger.error("stdio_client_write_failed", error=str(e))
176
+ with self.pending_lock:
177
+ self.pending.pop(request_id, None)
178
+ raise ClientError(f"write_failed: {e}")
179
+
180
+ try:
181
+ response = result_queue.get(timeout=timeout)
182
+ return response
183
+ except Empty:
184
+ with self.pending_lock:
185
+ self.pending.pop(request_id, None)
186
+ raise TimeoutError(f"timeout: {method} after {timeout}s")
187
+
188
+ def is_alive(self) -> bool:
189
+ """Check if the underlying process is still running."""
190
+ return self.process.poll() is None
191
+
192
+ def close(self):
193
+ """
194
+ Graceful shutdown: attempt RPC shutdown, then terminate process.
195
+ Safe to call multiple times.
196
+ """
197
+ if self.closed:
198
+ return
199
+
200
+ self.closed = True
201
+
202
+ # Try graceful shutdown via RPC
203
+ try:
204
+ self.call("shutdown", {}, timeout=3.0)
205
+ except Exception as e:
206
+ logger.debug("stdio_client_shutdown_rpc_failed", error=str(e))
207
+
208
+ # Terminate process
209
+ try:
210
+ if self.process.poll() is None:
211
+ self.process.terminate()
212
+ try:
213
+ self.process.wait(timeout=5.0)
214
+ except subprocess.TimeoutExpired:
215
+ logger.warning("stdio_client_process_terminate_timeout")
216
+ self.process.kill()
217
+ self.process.wait()
218
+ except Exception as e:
219
+ logger.error("stdio_client_cleanup_error", error=str(e))
220
+
221
+ # Clean up any remaining pending requests
222
+ self._cleanup_pending("client_closed")
223
+
224
+ def __enter__(self):
225
+ return self
226
+
227
+ def __exit__(self, exc_type, exc_val, exc_tb):
228
+ self.close()
229
+ return False