mcp-hangar 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (160) hide show
  1. mcp_hangar/__init__.py +139 -0
  2. mcp_hangar/application/__init__.py +1 -0
  3. mcp_hangar/application/commands/__init__.py +67 -0
  4. mcp_hangar/application/commands/auth_commands.py +118 -0
  5. mcp_hangar/application/commands/auth_handlers.py +296 -0
  6. mcp_hangar/application/commands/commands.py +59 -0
  7. mcp_hangar/application/commands/handlers.py +189 -0
  8. mcp_hangar/application/discovery/__init__.py +21 -0
  9. mcp_hangar/application/discovery/discovery_metrics.py +283 -0
  10. mcp_hangar/application/discovery/discovery_orchestrator.py +497 -0
  11. mcp_hangar/application/discovery/lifecycle_manager.py +315 -0
  12. mcp_hangar/application/discovery/security_validator.py +414 -0
  13. mcp_hangar/application/event_handlers/__init__.py +50 -0
  14. mcp_hangar/application/event_handlers/alert_handler.py +191 -0
  15. mcp_hangar/application/event_handlers/audit_handler.py +203 -0
  16. mcp_hangar/application/event_handlers/knowledge_base_handler.py +120 -0
  17. mcp_hangar/application/event_handlers/logging_handler.py +69 -0
  18. mcp_hangar/application/event_handlers/metrics_handler.py +152 -0
  19. mcp_hangar/application/event_handlers/persistent_audit_store.py +217 -0
  20. mcp_hangar/application/event_handlers/security_handler.py +604 -0
  21. mcp_hangar/application/mcp/tooling.py +158 -0
  22. mcp_hangar/application/ports/__init__.py +9 -0
  23. mcp_hangar/application/ports/observability.py +237 -0
  24. mcp_hangar/application/queries/__init__.py +52 -0
  25. mcp_hangar/application/queries/auth_handlers.py +237 -0
  26. mcp_hangar/application/queries/auth_queries.py +118 -0
  27. mcp_hangar/application/queries/handlers.py +227 -0
  28. mcp_hangar/application/read_models/__init__.py +11 -0
  29. mcp_hangar/application/read_models/provider_views.py +139 -0
  30. mcp_hangar/application/sagas/__init__.py +11 -0
  31. mcp_hangar/application/sagas/group_rebalance_saga.py +137 -0
  32. mcp_hangar/application/sagas/provider_failover_saga.py +266 -0
  33. mcp_hangar/application/sagas/provider_recovery_saga.py +172 -0
  34. mcp_hangar/application/services/__init__.py +9 -0
  35. mcp_hangar/application/services/provider_service.py +208 -0
  36. mcp_hangar/application/services/traced_provider_service.py +211 -0
  37. mcp_hangar/bootstrap/runtime.py +328 -0
  38. mcp_hangar/context.py +178 -0
  39. mcp_hangar/domain/__init__.py +117 -0
  40. mcp_hangar/domain/contracts/__init__.py +57 -0
  41. mcp_hangar/domain/contracts/authentication.py +225 -0
  42. mcp_hangar/domain/contracts/authorization.py +229 -0
  43. mcp_hangar/domain/contracts/event_store.py +178 -0
  44. mcp_hangar/domain/contracts/metrics_publisher.py +59 -0
  45. mcp_hangar/domain/contracts/persistence.py +383 -0
  46. mcp_hangar/domain/contracts/provider_runtime.py +146 -0
  47. mcp_hangar/domain/discovery/__init__.py +20 -0
  48. mcp_hangar/domain/discovery/conflict_resolver.py +267 -0
  49. mcp_hangar/domain/discovery/discovered_provider.py +185 -0
  50. mcp_hangar/domain/discovery/discovery_service.py +412 -0
  51. mcp_hangar/domain/discovery/discovery_source.py +192 -0
  52. mcp_hangar/domain/events.py +433 -0
  53. mcp_hangar/domain/exceptions.py +525 -0
  54. mcp_hangar/domain/model/__init__.py +70 -0
  55. mcp_hangar/domain/model/aggregate.py +58 -0
  56. mcp_hangar/domain/model/circuit_breaker.py +152 -0
  57. mcp_hangar/domain/model/event_sourced_api_key.py +413 -0
  58. mcp_hangar/domain/model/event_sourced_provider.py +423 -0
  59. mcp_hangar/domain/model/event_sourced_role_assignment.py +268 -0
  60. mcp_hangar/domain/model/health_tracker.py +183 -0
  61. mcp_hangar/domain/model/load_balancer.py +185 -0
  62. mcp_hangar/domain/model/provider.py +810 -0
  63. mcp_hangar/domain/model/provider_group.py +656 -0
  64. mcp_hangar/domain/model/tool_catalog.py +105 -0
  65. mcp_hangar/domain/policies/__init__.py +19 -0
  66. mcp_hangar/domain/policies/provider_health.py +187 -0
  67. mcp_hangar/domain/repository.py +249 -0
  68. mcp_hangar/domain/security/__init__.py +85 -0
  69. mcp_hangar/domain/security/input_validator.py +710 -0
  70. mcp_hangar/domain/security/rate_limiter.py +387 -0
  71. mcp_hangar/domain/security/roles.py +237 -0
  72. mcp_hangar/domain/security/sanitizer.py +387 -0
  73. mcp_hangar/domain/security/secrets.py +501 -0
  74. mcp_hangar/domain/services/__init__.py +20 -0
  75. mcp_hangar/domain/services/audit_service.py +376 -0
  76. mcp_hangar/domain/services/image_builder.py +328 -0
  77. mcp_hangar/domain/services/provider_launcher.py +1046 -0
  78. mcp_hangar/domain/value_objects.py +1138 -0
  79. mcp_hangar/errors.py +818 -0
  80. mcp_hangar/fastmcp_server.py +1105 -0
  81. mcp_hangar/gc.py +134 -0
  82. mcp_hangar/infrastructure/__init__.py +79 -0
  83. mcp_hangar/infrastructure/async_executor.py +133 -0
  84. mcp_hangar/infrastructure/auth/__init__.py +37 -0
  85. mcp_hangar/infrastructure/auth/api_key_authenticator.py +388 -0
  86. mcp_hangar/infrastructure/auth/event_sourced_store.py +567 -0
  87. mcp_hangar/infrastructure/auth/jwt_authenticator.py +360 -0
  88. mcp_hangar/infrastructure/auth/middleware.py +340 -0
  89. mcp_hangar/infrastructure/auth/opa_authorizer.py +243 -0
  90. mcp_hangar/infrastructure/auth/postgres_store.py +659 -0
  91. mcp_hangar/infrastructure/auth/projections.py +366 -0
  92. mcp_hangar/infrastructure/auth/rate_limiter.py +311 -0
  93. mcp_hangar/infrastructure/auth/rbac_authorizer.py +323 -0
  94. mcp_hangar/infrastructure/auth/sqlite_store.py +624 -0
  95. mcp_hangar/infrastructure/command_bus.py +112 -0
  96. mcp_hangar/infrastructure/discovery/__init__.py +110 -0
  97. mcp_hangar/infrastructure/discovery/docker_source.py +289 -0
  98. mcp_hangar/infrastructure/discovery/entrypoint_source.py +249 -0
  99. mcp_hangar/infrastructure/discovery/filesystem_source.py +383 -0
  100. mcp_hangar/infrastructure/discovery/kubernetes_source.py +247 -0
  101. mcp_hangar/infrastructure/event_bus.py +260 -0
  102. mcp_hangar/infrastructure/event_sourced_repository.py +443 -0
  103. mcp_hangar/infrastructure/event_store.py +396 -0
  104. mcp_hangar/infrastructure/knowledge_base/__init__.py +259 -0
  105. mcp_hangar/infrastructure/knowledge_base/contracts.py +202 -0
  106. mcp_hangar/infrastructure/knowledge_base/memory.py +177 -0
  107. mcp_hangar/infrastructure/knowledge_base/postgres.py +545 -0
  108. mcp_hangar/infrastructure/knowledge_base/sqlite.py +513 -0
  109. mcp_hangar/infrastructure/metrics_publisher.py +36 -0
  110. mcp_hangar/infrastructure/observability/__init__.py +10 -0
  111. mcp_hangar/infrastructure/observability/langfuse_adapter.py +534 -0
  112. mcp_hangar/infrastructure/persistence/__init__.py +33 -0
  113. mcp_hangar/infrastructure/persistence/audit_repository.py +371 -0
  114. mcp_hangar/infrastructure/persistence/config_repository.py +398 -0
  115. mcp_hangar/infrastructure/persistence/database.py +333 -0
  116. mcp_hangar/infrastructure/persistence/database_common.py +330 -0
  117. mcp_hangar/infrastructure/persistence/event_serializer.py +280 -0
  118. mcp_hangar/infrastructure/persistence/event_upcaster.py +166 -0
  119. mcp_hangar/infrastructure/persistence/in_memory_event_store.py +150 -0
  120. mcp_hangar/infrastructure/persistence/recovery_service.py +312 -0
  121. mcp_hangar/infrastructure/persistence/sqlite_event_store.py +386 -0
  122. mcp_hangar/infrastructure/persistence/unit_of_work.py +409 -0
  123. mcp_hangar/infrastructure/persistence/upcasters/README.md +13 -0
  124. mcp_hangar/infrastructure/persistence/upcasters/__init__.py +7 -0
  125. mcp_hangar/infrastructure/query_bus.py +153 -0
  126. mcp_hangar/infrastructure/saga_manager.py +401 -0
  127. mcp_hangar/logging_config.py +209 -0
  128. mcp_hangar/metrics.py +1007 -0
  129. mcp_hangar/models.py +31 -0
  130. mcp_hangar/observability/__init__.py +54 -0
  131. mcp_hangar/observability/health.py +487 -0
  132. mcp_hangar/observability/metrics.py +319 -0
  133. mcp_hangar/observability/tracing.py +433 -0
  134. mcp_hangar/progress.py +542 -0
  135. mcp_hangar/retry.py +613 -0
  136. mcp_hangar/server/__init__.py +120 -0
  137. mcp_hangar/server/__main__.py +6 -0
  138. mcp_hangar/server/auth_bootstrap.py +340 -0
  139. mcp_hangar/server/auth_cli.py +335 -0
  140. mcp_hangar/server/auth_config.py +305 -0
  141. mcp_hangar/server/bootstrap.py +735 -0
  142. mcp_hangar/server/cli.py +161 -0
  143. mcp_hangar/server/config.py +224 -0
  144. mcp_hangar/server/context.py +215 -0
  145. mcp_hangar/server/http_auth_middleware.py +165 -0
  146. mcp_hangar/server/lifecycle.py +467 -0
  147. mcp_hangar/server/state.py +117 -0
  148. mcp_hangar/server/tools/__init__.py +16 -0
  149. mcp_hangar/server/tools/discovery.py +186 -0
  150. mcp_hangar/server/tools/groups.py +75 -0
  151. mcp_hangar/server/tools/health.py +301 -0
  152. mcp_hangar/server/tools/provider.py +939 -0
  153. mcp_hangar/server/tools/registry.py +320 -0
  154. mcp_hangar/server/validation.py +113 -0
  155. mcp_hangar/stdio_client.py +229 -0
  156. mcp_hangar-0.2.0.dist-info/METADATA +347 -0
  157. mcp_hangar-0.2.0.dist-info/RECORD +160 -0
  158. mcp_hangar-0.2.0.dist-info/WHEEL +4 -0
  159. mcp_hangar-0.2.0.dist-info/entry_points.txt +2 -0
  160. mcp_hangar-0.2.0.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,939 @@
1
+ """Provider interaction tools: tools, invoke, details.
2
+
3
+ Uses ApplicationContext for dependency injection (DIP).
4
+ Separates commands (write) from queries (read) following CQRS.
5
+ """
6
+
7
+ import asyncio
8
+ import time
9
+ from typing import Any, Dict, Optional
10
+ import uuid
11
+
12
+ from mcp.server.fastmcp import Context, FastMCP
13
+
14
+ from ...application.commands import InvokeToolCommand, StartProviderCommand
15
+ from ...application.mcp.tooling import chain_validators, key_registry_invoke, mcp_tool_wrapper
16
+ from ...domain.model import ProviderGroup
17
+ from ...errors import map_exception_to_hangar_error, ProviderNotFoundError as HangarProviderNotFoundError
18
+ from ...infrastructure.async_executor import submit_async
19
+ from ...infrastructure.query_bus import GetProviderQuery, GetProviderToolsQuery
20
+ from ...progress import create_progress_tracker, get_stage_message, ProgressCallback, ProgressStage, ProgressTracker
21
+ from ...retry import get_retry_policy, retry_sync, RetryPolicy
22
+ from ..context import get_context
23
+ from ..validation import (
24
+ check_rate_limit,
25
+ tool_error_hook,
26
+ tool_error_mapper,
27
+ validate_arguments_input,
28
+ validate_provider_id_input,
29
+ validate_timeout_input,
30
+ validate_tool_name_input,
31
+ )
32
+
33
+ # =============================================================================
34
+ # Constants
35
+ # =============================================================================
36
+
37
+ DEFAULT_GROUP_RETRY_ATTEMPTS = 2
38
+ """Number of retry attempts when invoking tool on group members."""
39
+
40
+ DEFAULT_TIMEOUT_SECONDS = 30.0
41
+ """Default timeout for tool invocation."""
42
+
43
+ # =============================================================================
44
+ # Helper Functions
45
+ # =============================================================================
46
+
47
+
48
+ def _extract_error_text(content: Any) -> str:
49
+ """Extract error message text from MCP content array.
50
+
51
+ MCP content can be:
52
+ - A list of dicts with type/text: [{"type": "text", "text": "Error: ..."}]
53
+ - A string
54
+ - A dict with text field
55
+
56
+ Args:
57
+ content: MCP content field (can be list, dict, or string)
58
+
59
+ Returns:
60
+ Extracted error text string
61
+ """
62
+ if isinstance(content, str):
63
+ return content
64
+
65
+ if isinstance(content, list):
66
+ # Extract text from content items
67
+ texts = []
68
+ for item in content:
69
+ if isinstance(item, dict):
70
+ text = item.get("text", "")
71
+ if text:
72
+ texts.append(text)
73
+ elif isinstance(item, str):
74
+ texts.append(item)
75
+ return " ".join(texts) if texts else "Unknown error"
76
+
77
+ if isinstance(content, dict):
78
+ return content.get("text", content.get("message", str(content)))
79
+
80
+ return str(content) if content else "Unknown error"
81
+
82
+
83
+ def _submit_audit_log(
84
+ provider: str,
85
+ tool: str,
86
+ arguments: Dict[str, Any],
87
+ elapsed_ms: float,
88
+ success: bool,
89
+ result_summary: Optional[str] = None,
90
+ error_message: Optional[str] = None,
91
+ correlation_id: Optional[str] = None,
92
+ ) -> None:
93
+ """Submit audit log entry to knowledge base asynchronously.
94
+
95
+ This is a fire-and-forget operation that won't block the main thread
96
+ and won't fail the invocation if audit logging fails.
97
+
98
+ Args:
99
+ provider: Provider ID
100
+ tool: Tool name
101
+ arguments: Tool arguments
102
+ elapsed_ms: Operation duration in milliseconds
103
+ success: Whether the operation succeeded
104
+ result_summary: Optional result summary (truncated)
105
+ error_message: Optional error message (truncated)
106
+ correlation_id: Optional correlation ID for tracing
107
+ """
108
+ from ...infrastructure.knowledge_base import audit_log, is_available
109
+
110
+ if not is_available():
111
+ return
112
+
113
+ async def _do_audit():
114
+ await audit_log(
115
+ event_type="tool_invocation",
116
+ provider=provider,
117
+ tool=tool,
118
+ arguments=arguments,
119
+ result_summary=result_summary,
120
+ duration_ms=int(elapsed_ms),
121
+ success=success,
122
+ error_message=error_message,
123
+ correlation_id=correlation_id,
124
+ )
125
+
126
+ submit_async(_do_audit())
127
+
128
+
129
+ def _get_tools_for_group(provider: str) -> Dict[str, Any]:
130
+ """Get tools for a provider group."""
131
+ ctx = get_context()
132
+ group = ctx.get_group(provider)
133
+ selected = group.select_member()
134
+
135
+ if not selected:
136
+ raise ValueError(f"no_healthy_members_in_group: {provider}")
137
+
138
+ ctx.command_bus.send(StartProviderCommand(provider_id=selected.provider_id))
139
+ query = GetProviderToolsQuery(provider_id=selected.provider_id)
140
+ tools = ctx.query_bus.execute(query)
141
+
142
+ return {
143
+ "provider": provider,
144
+ "group": True,
145
+ "tools": [t.to_dict() for t in tools],
146
+ }
147
+
148
+
149
+ def _get_tools_for_provider(provider: str) -> Dict[str, Any]:
150
+ """Get tools for a single provider."""
151
+ ctx = get_context()
152
+ provider_obj = ctx.get_provider(provider)
153
+
154
+ # If provider has predefined tools, return them without starting
155
+ if provider_obj.has_tools:
156
+ tools = provider_obj.tools.list_tools()
157
+ return {
158
+ "provider": provider,
159
+ "state": provider_obj.state.value,
160
+ "predefined": provider_obj.tools_predefined,
161
+ "tools": [t.to_dict() for t in tools],
162
+ }
163
+
164
+ # Start provider and discover tools
165
+ ctx.command_bus.send(StartProviderCommand(provider_id=provider))
166
+ query = GetProviderToolsQuery(provider_id=provider)
167
+ tools = ctx.query_bus.execute(query)
168
+
169
+ return {
170
+ "provider": provider,
171
+ "state": provider_obj.state.value,
172
+ "predefined": False,
173
+ "tools": [t.to_dict() for t in tools],
174
+ }
175
+
176
+
177
+ def _invoke_on_provider(
178
+ provider: str,
179
+ tool: str,
180
+ arguments: Dict,
181
+ timeout: float,
182
+ progress: Optional[ProgressTracker] = None,
183
+ ) -> Dict[str, Any]:
184
+ """Invoke tool on a single provider."""
185
+ ctx = get_context()
186
+
187
+ # Check if provider needs cold start
188
+ provider_obj = ctx.get_provider(provider)
189
+ is_cold_start = provider_obj and provider_obj.state.value == "cold"
190
+
191
+ if is_cold_start and progress:
192
+ progress.report(ProgressStage.LAUNCHING, f"Starting {provider_obj.mode} provider...")
193
+
194
+ # Report ready state after provider is available (launched or already warm)
195
+ if progress and not is_cold_start:
196
+ # Already warm - skip to ready
197
+ pass # ready was already reported in caller
198
+ elif progress:
199
+ # Cold start completed - report ready
200
+ progress.report(ProgressStage.READY, "Provider ready")
201
+
202
+ if progress:
203
+ progress.report(ProgressStage.EXECUTING, get_stage_message(ProgressStage.EXECUTING, tool=tool))
204
+
205
+ command = InvokeToolCommand(
206
+ provider_id=provider,
207
+ tool_name=tool,
208
+ arguments=arguments,
209
+ timeout=timeout,
210
+ )
211
+ result = ctx.command_bus.send(command)
212
+
213
+ if progress:
214
+ progress.report(ProgressStage.PROCESSING, "Processing response...")
215
+
216
+ return result
217
+
218
+
219
+ def _invoke_on_group(
220
+ group_id: str,
221
+ tool: str,
222
+ arguments: Dict,
223
+ timeout: float,
224
+ progress: Optional[ProgressTracker] = None,
225
+ ) -> Dict[str, Any]:
226
+ """Invoke a tool on a provider group with load balancing."""
227
+ ctx = get_context()
228
+ group = ctx.get_group(group_id)
229
+
230
+ if not group.is_available:
231
+ raise ValueError(f"group_not_available: {group_id} (state={group.state.value})")
232
+
233
+ selected = group.select_member()
234
+ if not selected:
235
+ raise ValueError(f"no_healthy_members_in_group: {group_id}")
236
+
237
+ return _invoke_with_retry(group, tool, arguments, timeout, progress=progress)
238
+
239
+
240
+ def _invoke_with_retry(
241
+ group: ProviderGroup,
242
+ tool: str,
243
+ arguments: Dict,
244
+ timeout: float,
245
+ max_attempts: int = DEFAULT_GROUP_RETRY_ATTEMPTS,
246
+ progress: Optional[ProgressTracker] = None,
247
+ ) -> Dict[str, Any]:
248
+ """Invoke tool with retry on different group members."""
249
+ first_error: Optional[Exception] = None
250
+ tried_members: set = set()
251
+
252
+ for attempt in range(max_attempts):
253
+ selected = group.select_member()
254
+ if not selected or selected.provider_id in tried_members:
255
+ break
256
+
257
+ tried_members.add(selected.provider_id)
258
+
259
+ try:
260
+ result = _invoke_on_provider(selected.provider_id, tool, arguments, timeout, progress)
261
+ group.report_success(selected.provider_id)
262
+ return result
263
+ except Exception as e:
264
+ group.report_failure(selected.provider_id)
265
+ first_error = first_error or e
266
+
267
+ if progress and attempt < max_attempts - 1:
268
+ progress.report(
269
+ ProgressStage.RETRYING,
270
+ get_stage_message(
271
+ ProgressStage.RETRYING,
272
+ attempt=attempt + 2,
273
+ max_attempts=max_attempts,
274
+ ),
275
+ )
276
+
277
+ raise first_error or ValueError("no_healthy_members_in_group")
278
+
279
+
280
+ # =============================================================================
281
+ # Tool Registration
282
+ # =============================================================================
283
+
284
+
285
+ def register_provider_tools(mcp: FastMCP) -> None:
286
+ """Register provider interaction tools with MCP server."""
287
+
288
+ @mcp.tool(name="registry_tools")
289
+ @mcp_tool_wrapper(
290
+ tool_name="registry_tools",
291
+ rate_limit_key=lambda provider: f"registry_tools:{provider}",
292
+ check_rate_limit=check_rate_limit,
293
+ validate=validate_provider_id_input,
294
+ error_mapper=tool_error_mapper,
295
+ on_error=lambda exc, ctx: tool_error_hook(exc, ctx),
296
+ )
297
+ def registry_tools(provider: str) -> dict:
298
+ """
299
+ Get detailed tool schemas for a provider.
300
+
301
+ This is a QUERY operation with potential side-effect (starting provider).
302
+
303
+ Args:
304
+ provider: Provider ID
305
+
306
+ Returns:
307
+ Dictionary with provider ID and list of tool schemas
308
+
309
+ Raises:
310
+ ValueError: If provider ID is unknown or invalid
311
+ """
312
+ ctx = get_context()
313
+
314
+ if ctx.group_exists(provider):
315
+ return _get_tools_for_group(provider)
316
+
317
+ if not ctx.provider_exists(provider):
318
+ raise ValueError(f"unknown_provider: {provider}")
319
+
320
+ return _get_tools_for_provider(provider)
321
+
322
+ @mcp.tool(name="registry_invoke")
323
+ @mcp_tool_wrapper(
324
+ tool_name="registry_invoke",
325
+ rate_limit_key=key_registry_invoke,
326
+ check_rate_limit=check_rate_limit,
327
+ validate=chain_validators(
328
+ lambda provider, tool, arguments=None, timeout=DEFAULT_TIMEOUT_SECONDS: validate_provider_id_input(
329
+ provider
330
+ ),
331
+ lambda provider, tool, arguments=None, timeout=DEFAULT_TIMEOUT_SECONDS: validate_tool_name_input(tool),
332
+ lambda provider, tool, arguments=None, timeout=DEFAULT_TIMEOUT_SECONDS: validate_arguments_input(
333
+ arguments or {}
334
+ ),
335
+ lambda provider, tool, arguments=None, timeout=DEFAULT_TIMEOUT_SECONDS: validate_timeout_input(timeout),
336
+ ),
337
+ error_mapper=tool_error_mapper,
338
+ on_error=lambda exc, ctx: tool_error_hook(exc, ctx),
339
+ )
340
+ def registry_invoke(
341
+ provider: str,
342
+ tool: str,
343
+ arguments: Optional[dict] = None,
344
+ timeout: float = DEFAULT_TIMEOUT_SECONDS,
345
+ ) -> dict:
346
+ """
347
+ Invoke a tool on a provider or provider group.
348
+
349
+ This is a COMMAND operation - it may have side effects.
350
+
351
+ Args:
352
+ provider: Provider ID or Group ID
353
+ tool: Tool name
354
+ arguments: Tool arguments (default: empty dict)
355
+ timeout: Timeout in seconds (default: 30.0)
356
+
357
+ Returns:
358
+ Tool result
359
+
360
+ Raises:
361
+ ValueError: If provider ID is unknown or inputs are invalid
362
+ """
363
+ ctx = get_context()
364
+ args = arguments or {}
365
+
366
+ if ctx.group_exists(provider):
367
+ return _invoke_on_group(provider, tool, args, timeout)
368
+
369
+ if not ctx.provider_exists(provider):
370
+ raise ValueError(f"unknown_provider: {provider}")
371
+
372
+ return _invoke_on_provider(provider, tool, args, timeout)
373
+
374
+ # =========================================================================
375
+ # Enhanced invoke with retry and progress support
376
+ # =========================================================================
377
+
378
+ def _invoke_with_full_retry(
379
+ provider: str,
380
+ tool: str,
381
+ arguments: Dict[str, Any],
382
+ timeout: float,
383
+ retry_policy: Optional[RetryPolicy] = None,
384
+ progress_callback: Optional[ProgressCallback] = None,
385
+ include_progress: bool = True,
386
+ correlation_id: Optional[str] = None,
387
+ ) -> Dict[str, Any]:
388
+ """Internal implementation of invoke with retry and progress.
389
+
390
+ Progress is always tracked and logged. If include_progress=True,
391
+ progress events are included in the response under _progress key.
392
+ """
393
+ from ...errors import ErrorClassifier
394
+ from ...logging_config import get_logger
395
+
396
+ logger = get_logger(__name__)
397
+
398
+ ctx = get_context()
399
+ args = arguments or {}
400
+ correlation_id = correlation_id or str(uuid.uuid4())
401
+
402
+ # Get retry policy (provider-specific or default)
403
+ policy = retry_policy or get_retry_policy(provider)
404
+
405
+ # Always create progress tracker for logging
406
+ # User callback is optional (for programmatic use)
407
+ progress_events = []
408
+
409
+ def log_progress(stage: str, message: str, elapsed_ms: float):
410
+ """Log progress and collect events."""
411
+ event = {
412
+ "stage": stage,
413
+ "message": message,
414
+ "elapsed_ms": round(elapsed_ms, 2),
415
+ }
416
+ progress_events.append(event)
417
+ logger.info(
418
+ "operation_progress",
419
+ provider=provider,
420
+ tool=tool,
421
+ correlation_id=correlation_id,
422
+ **event,
423
+ )
424
+ # Also call user callback if provided
425
+ if progress_callback:
426
+ try:
427
+ progress_callback(stage, message, elapsed_ms)
428
+ except Exception:
429
+ pass
430
+
431
+ progress = create_progress_tracker(
432
+ provider=provider,
433
+ operation=tool,
434
+ callback=log_progress,
435
+ correlation_id=correlation_id,
436
+ )
437
+
438
+ # Report initial state
439
+ provider_obj = ctx.get_provider(provider) if ctx.provider_exists(provider) else None
440
+ if provider_obj and provider_obj.state.value == "cold":
441
+ progress.report(ProgressStage.COLD_START, "Provider is cold, launching...")
442
+ else:
443
+ progress.report(ProgressStage.READY, "Starting operation...")
444
+
445
+ def do_invoke():
446
+ if ctx.group_exists(provider):
447
+ return _invoke_on_group(provider, tool, args, timeout, progress)
448
+
449
+ if not ctx.provider_exists(provider):
450
+ available = list(ctx.repository.get_all().keys())
451
+ raise HangarProviderNotFoundError(
452
+ message=f"Provider '{provider}' not found",
453
+ provider=provider,
454
+ operation="invoke",
455
+ available_providers=available,
456
+ )
457
+
458
+ return _invoke_on_provider(provider, tool, args, timeout, progress)
459
+
460
+ # Execute with retry
461
+ start_time = time.time()
462
+ result = retry_sync(
463
+ operation=do_invoke,
464
+ policy=policy,
465
+ provider=provider,
466
+ operation_name=tool,
467
+ on_retry=lambda attempt, err, delay: progress.report(
468
+ ProgressStage.RETRYING,
469
+ f"Retry {attempt}/{policy.max_attempts} in {delay:.1f}s: {str(err)[:50]}",
470
+ ),
471
+ )
472
+
473
+ elapsed_ms = (time.time() - start_time) * 1000
474
+
475
+ # Submit audit log (fire-and-forget, won't block or fail invocation)
476
+ _submit_audit_log(
477
+ provider=provider,
478
+ tool=tool,
479
+ arguments=args,
480
+ elapsed_ms=elapsed_ms,
481
+ success=result.success,
482
+ result_summary=str(result.result)[:200] if result.success else None,
483
+ error_message=str(result.final_error)[:500] if not result.success else None,
484
+ correlation_id=correlation_id,
485
+ )
486
+
487
+ if result.success:
488
+ progress.complete(result.result)
489
+
490
+ response: Dict[str, Any] = {
491
+ **result.result,
492
+ "_retry_metadata": {
493
+ "correlation_id": correlation_id,
494
+ "attempts": result.attempt_count,
495
+ "total_time_ms": round(elapsed_ms, 2),
496
+ "retries": [a.error_type for a in result.attempts],
497
+ },
498
+ }
499
+
500
+ # Check if result contains isError: true (provider returned error in response)
501
+ # This is different from an exception - the provider executed successfully
502
+ # but returned an error response (e.g., division by zero)
503
+ if result.result.get("isError"):
504
+ # Extract error message from content
505
+ error_text = _extract_error_text(result.result.get("content", []))
506
+ error_classification = ErrorClassifier.classify(Exception(error_text))
507
+
508
+ response["_retry_metadata"]["final_error_reason"] = error_classification["final_error_reason"]
509
+ response["_retry_metadata"]["recovery_hints"] = error_classification["recovery_hints"]
510
+
511
+ # Include progress events if requested
512
+ if include_progress:
513
+ response["_progress"] = progress_events
514
+
515
+ return response
516
+ else:
517
+ # Classify the error for enriched metadata
518
+ error_classification = ErrorClassifier.classify(result.final_error)
519
+
520
+ # Map error to HangarError for better UX
521
+ hangar_error = map_exception_to_hangar_error(
522
+ result.final_error,
523
+ provider=provider,
524
+ operation=tool,
525
+ context={
526
+ "arguments": args,
527
+ "timeout": timeout,
528
+ "attempts": result.attempt_count,
529
+ "progress": progress_events,
530
+ },
531
+ )
532
+ progress.fail(hangar_error)
533
+
534
+ # Return error response with enriched metadata (don't raise)
535
+ error_response: Dict[str, Any] = {
536
+ "content": f"Error executing tool {tool}: {str(result.final_error)}",
537
+ "isError": True,
538
+ "_retry_metadata": {
539
+ "correlation_id": correlation_id,
540
+ "attempts": result.attempt_count,
541
+ "total_time_ms": round(elapsed_ms, 2),
542
+ "retries": [a.error_type for a in result.attempts],
543
+ "final_error_reason": error_classification["final_error_reason"],
544
+ "recovery_hints": error_classification["recovery_hints"],
545
+ },
546
+ }
547
+
548
+ # Include progress events if requested
549
+ if include_progress:
550
+ error_response["_progress"] = progress_events
551
+
552
+ return error_response
553
+
554
+ @mcp.tool(name="registry_invoke_ex")
555
+ @mcp_tool_wrapper(
556
+ tool_name="registry_invoke_ex",
557
+ rate_limit_key=key_registry_invoke,
558
+ check_rate_limit=check_rate_limit,
559
+ validate=chain_validators(
560
+ lambda provider, tool, arguments=None, timeout=DEFAULT_TIMEOUT_SECONDS, **kw: validate_provider_id_input(
561
+ provider
562
+ ),
563
+ lambda provider, tool, arguments=None, timeout=DEFAULT_TIMEOUT_SECONDS, **kw: validate_tool_name_input(
564
+ tool
565
+ ),
566
+ lambda provider, tool, arguments=None, timeout=DEFAULT_TIMEOUT_SECONDS, **kw: validate_arguments_input(
567
+ arguments or {}
568
+ ),
569
+ lambda provider, tool, arguments=None, timeout=DEFAULT_TIMEOUT_SECONDS, **kw: validate_timeout_input(
570
+ timeout
571
+ ),
572
+ ),
573
+ error_mapper=tool_error_mapper,
574
+ on_error=lambda exc, ctx_dict: tool_error_hook(exc, ctx_dict),
575
+ )
576
+ def registry_invoke_ex(
577
+ provider: str,
578
+ tool: str,
579
+ arguments: Optional[dict] = None,
580
+ timeout: float = DEFAULT_TIMEOUT_SECONDS,
581
+ max_retries: int = 3,
582
+ retry_on_error: bool = True,
583
+ correlation_id: Optional[str] = None,
584
+ ) -> dict:
585
+ """
586
+ Invoke a tool with automatic retry on transient failures.
587
+
588
+ Extended version of registry_invoke with:
589
+ - Automatic retry with exponential backoff
590
+ - Rich error messages with recovery hints
591
+ - Retry metadata in response
592
+ - Correlation ID for tracing
593
+
594
+ Args:
595
+ provider: Provider ID or Group ID
596
+ tool: Tool name
597
+ arguments: Tool arguments (default: empty dict)
598
+ timeout: Timeout in seconds (default: 30.0)
599
+ max_retries: Maximum retry attempts (default: 3)
600
+ retry_on_error: Whether to retry on transient errors (default: True)
601
+ correlation_id: Optional correlation ID for tracing (auto-generated UUID if not provided)
602
+
603
+ Returns:
604
+ Tool result with _retry_metadata field containing:
605
+ - correlation_id: Trace ID for this operation
606
+ - attempts: Number of attempts made
607
+ - total_time_ms: Total execution time
608
+ - retries: List of error types from retries
609
+ - final_error_reason: (on error) Classification of the error
610
+ - recovery_hints: (on error) Actionable steps to resolve
611
+
612
+ Raises:
613
+ HangarError: Rich error with recovery hints
614
+ """
615
+ policy = None
616
+ if retry_on_error:
617
+ policy = RetryPolicy(max_attempts=max_retries)
618
+ else:
619
+ policy = RetryPolicy(max_attempts=1)
620
+
621
+ return _invoke_with_full_retry(
622
+ provider=provider,
623
+ tool=tool,
624
+ arguments=arguments or {},
625
+ timeout=timeout,
626
+ retry_policy=policy,
627
+ correlation_id=correlation_id,
628
+ )
629
+
630
+ @mcp.tool(name="registry_invoke_stream")
631
+ async def registry_invoke_stream(
632
+ provider: str,
633
+ tool: str,
634
+ ctx: Context,
635
+ arguments: Optional[dict] = None,
636
+ timeout: float = DEFAULT_TIMEOUT_SECONDS,
637
+ max_retries: int = 3,
638
+ correlation_id: Optional[str] = None,
639
+ ) -> dict:
640
+ """
641
+ Invoke a tool with real-time progress notifications.
642
+
643
+ This tool sends MCP progress notifications during execution,
644
+ allowing the model to see progress in real-time:
645
+ - Cold start detection
646
+ - Provider launching
647
+ - Tool execution
648
+ - Retry attempts
649
+
650
+ Args:
651
+ provider: Provider ID or Group ID
652
+ tool: Tool name
653
+ ctx: MCP Context (injected automatically)
654
+ arguments: Tool arguments (default: empty dict)
655
+ timeout: Timeout in seconds (default: 30.0)
656
+ max_retries: Maximum retry attempts (default: 3)
657
+ correlation_id: Optional correlation ID for tracing (auto-generated if not provided)
658
+
659
+ Returns:
660
+ Tool result with _retry_metadata and _progress fields
661
+
662
+ Note:
663
+ Progress is sent via MCP notifications. The model receives
664
+ updates like "cold_start: Provider is cold, launching..."
665
+ during execution.
666
+ """
667
+ from ...logging_config import get_logger
668
+
669
+ logger = get_logger(__name__)
670
+
671
+ app_ctx = get_context()
672
+ args = arguments or {}
673
+ correlation_id = correlation_id or str(uuid.uuid4())
674
+
675
+ # Get retry policy
676
+ policy = RetryPolicy(max_attempts=max_retries)
677
+
678
+ # Track progress events - populated synchronously
679
+ progress_events = []
680
+ progress_step = [0] # Mutable counter for closure
681
+ total_steps = 5 # Estimate: cold_start, launch, execute, process, complete
682
+ pending_notifications = [] # Store pending async notifications
683
+
684
+ def sync_progress_callback(stage: str, message: str, elapsed_ms: float):
685
+ """Sync callback that collects progress and schedules MCP notification."""
686
+ progress_step[0] += 1
687
+ event = {
688
+ "stage": stage,
689
+ "message": message,
690
+ "elapsed_ms": round(elapsed_ms, 2),
691
+ }
692
+ # Always append synchronously - this is the fix!
693
+ progress_events.append(event)
694
+
695
+ # Log to server
696
+ logger.info(
697
+ "operation_progress",
698
+ provider=provider,
699
+ tool=tool,
700
+ correlation_id=correlation_id,
701
+ **event,
702
+ )
703
+
704
+ # Schedule MCP notification (fire-and-forget)
705
+ async def send_mcp_notification():
706
+ try:
707
+ await ctx.report_progress(
708
+ progress=progress_step[0],
709
+ total=total_steps,
710
+ message=f"[{stage}] {message}",
711
+ )
712
+ except Exception as e:
713
+ logger.debug("mcp_progress_notification_failed", error=str(e))
714
+
715
+ try:
716
+ loop = asyncio.get_event_loop()
717
+ if loop.is_running():
718
+ task = asyncio.create_task(send_mcp_notification())
719
+ pending_notifications.append(task)
720
+ except RuntimeError:
721
+ pass # No event loop - skip MCP notifications
722
+
723
+ # Create progress tracker
724
+ start_time = time.time()
725
+
726
+ progress = create_progress_tracker(
727
+ provider=provider,
728
+ operation=tool,
729
+ callback=sync_progress_callback,
730
+ correlation_id=correlation_id,
731
+ )
732
+
733
+ # Report initial state - check cold start
734
+ provider_obj = app_ctx.get_provider(provider) if app_ctx.provider_exists(provider) else None
735
+ is_cold_start = provider_obj and provider_obj.state.value == "cold"
736
+
737
+ if is_cold_start:
738
+ progress.report(ProgressStage.COLD_START, "Provider is cold, launching...")
739
+ else:
740
+ progress.report(ProgressStage.READY, "Starting operation...")
741
+
742
+ def do_invoke():
743
+ if app_ctx.group_exists(provider):
744
+ return _invoke_on_group(provider, tool, args, timeout, progress)
745
+
746
+ if not app_ctx.provider_exists(provider):
747
+ available = list(app_ctx.repository.get_all().keys())
748
+ raise HangarProviderNotFoundError(
749
+ message=f"Provider '{provider}' not found",
750
+ provider=provider,
751
+ operation="invoke",
752
+ available_providers=available,
753
+ )
754
+
755
+ return _invoke_on_provider(provider, tool, args, timeout, progress)
756
+
757
+ # Execute with retry (sync, but progress is reported async)
758
+ result = retry_sync(
759
+ operation=do_invoke,
760
+ policy=policy,
761
+ provider=provider,
762
+ operation_name=tool,
763
+ on_retry=lambda attempt, err, delay: progress.report(
764
+ ProgressStage.RETRYING,
765
+ f"Retry {attempt}/{policy.max_attempts} in {delay:.1f}s: {str(err)[:50]}",
766
+ ),
767
+ )
768
+
769
+ elapsed_ms = (time.time() - start_time) * 1000
770
+
771
+ if result.success:
772
+ progress.complete(result.result)
773
+
774
+ # Final progress notification
775
+ await ctx.report_progress(
776
+ progress=total_steps,
777
+ total=total_steps,
778
+ message=f"[complete] Operation completed in {elapsed_ms:.0f}ms",
779
+ )
780
+
781
+ response = {
782
+ **result.result,
783
+ "_retry_metadata": {
784
+ "correlation_id": correlation_id,
785
+ "attempts": result.attempt_count,
786
+ "total_time_ms": round(elapsed_ms, 2),
787
+ "retries": [a.error_type for a in result.attempts],
788
+ },
789
+ "_progress": progress_events,
790
+ }
791
+
792
+ # Check if result contains isError: true (provider returned error in response)
793
+ if result.result.get("isError"):
794
+ from ...errors import ErrorClassifier
795
+
796
+ error_text = _extract_error_text(result.result.get("content", []))
797
+ error_classification = ErrorClassifier.classify(Exception(error_text))
798
+
799
+ response["_retry_metadata"]["final_error_reason"] = error_classification["final_error_reason"]
800
+ response["_retry_metadata"]["recovery_hints"] = error_classification["recovery_hints"]
801
+
802
+ return response
803
+ else:
804
+ from ...errors import ErrorClassifier
805
+
806
+ # Classify the error for enriched metadata
807
+ error_classification = ErrorClassifier.classify(result.final_error)
808
+
809
+ hangar_error = map_exception_to_hangar_error(
810
+ result.final_error,
811
+ provider=provider,
812
+ operation=tool,
813
+ context={
814
+ "arguments": args,
815
+ "timeout": timeout,
816
+ "attempts": result.attempt_count,
817
+ "progress": progress_events,
818
+ },
819
+ )
820
+ progress.fail(hangar_error)
821
+
822
+ # Return error response with enriched metadata (consistent with invoke_ex)
823
+ return {
824
+ "content": f"Error executing tool {tool}: {str(result.final_error)}",
825
+ "isError": True,
826
+ "_retry_metadata": {
827
+ "correlation_id": correlation_id,
828
+ "attempts": result.attempt_count,
829
+ "total_time_ms": round(elapsed_ms, 2),
830
+ "retries": [a.error_type for a in result.attempts],
831
+ "final_error_reason": error_classification["final_error_reason"],
832
+ "recovery_hints": error_classification["recovery_hints"],
833
+ },
834
+ "_progress": progress_events,
835
+ }
836
+
837
+ @mcp.tool(name="registry_details")
838
+ @mcp_tool_wrapper(
839
+ tool_name="registry_details",
840
+ rate_limit_key=lambda provider: f"registry_details:{provider}",
841
+ check_rate_limit=check_rate_limit,
842
+ validate=validate_provider_id_input,
843
+ error_mapper=tool_error_mapper,
844
+ on_error=lambda exc, ctx: tool_error_hook(exc, ctx),
845
+ )
846
+ def registry_details(provider: str) -> dict:
847
+ """
848
+ Get detailed information about a provider or group.
849
+
850
+ This is a QUERY operation - no side effects.
851
+
852
+ Args:
853
+ provider: Provider ID or Group ID
854
+
855
+ Returns:
856
+ Dictionary with full provider/group details
857
+
858
+ Raises:
859
+ ValueError: If provider ID is unknown or invalid
860
+ """
861
+ ctx = get_context()
862
+
863
+ if ctx.group_exists(provider):
864
+ return ctx.get_group(provider).to_status_dict()
865
+
866
+ if not ctx.provider_exists(provider):
867
+ raise ValueError(f"unknown_provider: {provider}")
868
+
869
+ query = GetProviderQuery(provider_id=provider)
870
+ return ctx.query_bus.execute(query).to_dict()
871
+
872
+ @mcp.tool(name="registry_warm")
873
+ @mcp_tool_wrapper(
874
+ tool_name="registry_warm",
875
+ rate_limit_key=lambda providers="": "registry_warm",
876
+ check_rate_limit=check_rate_limit,
877
+ validate=None,
878
+ error_mapper=tool_error_mapper,
879
+ on_error=lambda exc, ctx_dict: tool_error_hook(exc, ctx_dict),
880
+ )
881
+ def registry_warm(providers: Optional[str] = None) -> dict:
882
+ """
883
+ Pre-start (warm up) providers to avoid cold start latency.
884
+
885
+ Starts the specified providers in advance so they're ready
886
+ when you need them. This eliminates cold start delays.
887
+
888
+ Args:
889
+ providers: Comma-separated list of provider IDs to warm up.
890
+ If empty, warms all providers.
891
+
892
+ Returns:
893
+ Dictionary with status for each provider:
894
+ - warmed: List of successfully started providers
895
+ - already_warm: List of providers that were already running
896
+ - failed: List of providers that failed to start
897
+
898
+ Example:
899
+ registry_warm("math,sqlite") # Warm specific providers
900
+ registry_warm() # Warm all providers
901
+ """
902
+ ctx = get_context()
903
+
904
+ # Parse provider list
905
+ if providers:
906
+ provider_ids = [p.strip() for p in providers.split(",") if p.strip()]
907
+ else:
908
+ provider_ids = list(ctx.repository.get_all().keys())
909
+
910
+ warmed = []
911
+ already_warm = []
912
+ failed = []
913
+
914
+ for provider_id in provider_ids:
915
+ # Skip groups
916
+ if ctx.group_exists(provider_id):
917
+ continue
918
+
919
+ if not ctx.provider_exists(provider_id):
920
+ failed.append({"id": provider_id, "error": "Provider not found"})
921
+ continue
922
+
923
+ try:
924
+ provider_obj = ctx.get_provider(provider_id)
925
+ if provider_obj and provider_obj.state.value == "ready":
926
+ already_warm.append(provider_id)
927
+ else:
928
+ command = StartProviderCommand(provider_id=provider_id)
929
+ ctx.command_bus.send(command)
930
+ warmed.append(provider_id)
931
+ except Exception as e:
932
+ failed.append({"id": provider_id, "error": str(e)[:100]})
933
+
934
+ return {
935
+ "warmed": warmed,
936
+ "already_warm": already_warm,
937
+ "failed": failed,
938
+ "summary": f"Warmed {len(warmed)} providers, {len(already_warm)} already warm, {len(failed)} failed",
939
+ }