mcp-hangar 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (160) hide show
  1. mcp_hangar/__init__.py +139 -0
  2. mcp_hangar/application/__init__.py +1 -0
  3. mcp_hangar/application/commands/__init__.py +67 -0
  4. mcp_hangar/application/commands/auth_commands.py +118 -0
  5. mcp_hangar/application/commands/auth_handlers.py +296 -0
  6. mcp_hangar/application/commands/commands.py +59 -0
  7. mcp_hangar/application/commands/handlers.py +189 -0
  8. mcp_hangar/application/discovery/__init__.py +21 -0
  9. mcp_hangar/application/discovery/discovery_metrics.py +283 -0
  10. mcp_hangar/application/discovery/discovery_orchestrator.py +497 -0
  11. mcp_hangar/application/discovery/lifecycle_manager.py +315 -0
  12. mcp_hangar/application/discovery/security_validator.py +414 -0
  13. mcp_hangar/application/event_handlers/__init__.py +50 -0
  14. mcp_hangar/application/event_handlers/alert_handler.py +191 -0
  15. mcp_hangar/application/event_handlers/audit_handler.py +203 -0
  16. mcp_hangar/application/event_handlers/knowledge_base_handler.py +120 -0
  17. mcp_hangar/application/event_handlers/logging_handler.py +69 -0
  18. mcp_hangar/application/event_handlers/metrics_handler.py +152 -0
  19. mcp_hangar/application/event_handlers/persistent_audit_store.py +217 -0
  20. mcp_hangar/application/event_handlers/security_handler.py +604 -0
  21. mcp_hangar/application/mcp/tooling.py +158 -0
  22. mcp_hangar/application/ports/__init__.py +9 -0
  23. mcp_hangar/application/ports/observability.py +237 -0
  24. mcp_hangar/application/queries/__init__.py +52 -0
  25. mcp_hangar/application/queries/auth_handlers.py +237 -0
  26. mcp_hangar/application/queries/auth_queries.py +118 -0
  27. mcp_hangar/application/queries/handlers.py +227 -0
  28. mcp_hangar/application/read_models/__init__.py +11 -0
  29. mcp_hangar/application/read_models/provider_views.py +139 -0
  30. mcp_hangar/application/sagas/__init__.py +11 -0
  31. mcp_hangar/application/sagas/group_rebalance_saga.py +137 -0
  32. mcp_hangar/application/sagas/provider_failover_saga.py +266 -0
  33. mcp_hangar/application/sagas/provider_recovery_saga.py +172 -0
  34. mcp_hangar/application/services/__init__.py +9 -0
  35. mcp_hangar/application/services/provider_service.py +208 -0
  36. mcp_hangar/application/services/traced_provider_service.py +211 -0
  37. mcp_hangar/bootstrap/runtime.py +328 -0
  38. mcp_hangar/context.py +178 -0
  39. mcp_hangar/domain/__init__.py +117 -0
  40. mcp_hangar/domain/contracts/__init__.py +57 -0
  41. mcp_hangar/domain/contracts/authentication.py +225 -0
  42. mcp_hangar/domain/contracts/authorization.py +229 -0
  43. mcp_hangar/domain/contracts/event_store.py +178 -0
  44. mcp_hangar/domain/contracts/metrics_publisher.py +59 -0
  45. mcp_hangar/domain/contracts/persistence.py +383 -0
  46. mcp_hangar/domain/contracts/provider_runtime.py +146 -0
  47. mcp_hangar/domain/discovery/__init__.py +20 -0
  48. mcp_hangar/domain/discovery/conflict_resolver.py +267 -0
  49. mcp_hangar/domain/discovery/discovered_provider.py +185 -0
  50. mcp_hangar/domain/discovery/discovery_service.py +412 -0
  51. mcp_hangar/domain/discovery/discovery_source.py +192 -0
  52. mcp_hangar/domain/events.py +433 -0
  53. mcp_hangar/domain/exceptions.py +525 -0
  54. mcp_hangar/domain/model/__init__.py +70 -0
  55. mcp_hangar/domain/model/aggregate.py +58 -0
  56. mcp_hangar/domain/model/circuit_breaker.py +152 -0
  57. mcp_hangar/domain/model/event_sourced_api_key.py +413 -0
  58. mcp_hangar/domain/model/event_sourced_provider.py +423 -0
  59. mcp_hangar/domain/model/event_sourced_role_assignment.py +268 -0
  60. mcp_hangar/domain/model/health_tracker.py +183 -0
  61. mcp_hangar/domain/model/load_balancer.py +185 -0
  62. mcp_hangar/domain/model/provider.py +810 -0
  63. mcp_hangar/domain/model/provider_group.py +656 -0
  64. mcp_hangar/domain/model/tool_catalog.py +105 -0
  65. mcp_hangar/domain/policies/__init__.py +19 -0
  66. mcp_hangar/domain/policies/provider_health.py +187 -0
  67. mcp_hangar/domain/repository.py +249 -0
  68. mcp_hangar/domain/security/__init__.py +85 -0
  69. mcp_hangar/domain/security/input_validator.py +710 -0
  70. mcp_hangar/domain/security/rate_limiter.py +387 -0
  71. mcp_hangar/domain/security/roles.py +237 -0
  72. mcp_hangar/domain/security/sanitizer.py +387 -0
  73. mcp_hangar/domain/security/secrets.py +501 -0
  74. mcp_hangar/domain/services/__init__.py +20 -0
  75. mcp_hangar/domain/services/audit_service.py +376 -0
  76. mcp_hangar/domain/services/image_builder.py +328 -0
  77. mcp_hangar/domain/services/provider_launcher.py +1046 -0
  78. mcp_hangar/domain/value_objects.py +1138 -0
  79. mcp_hangar/errors.py +818 -0
  80. mcp_hangar/fastmcp_server.py +1105 -0
  81. mcp_hangar/gc.py +134 -0
  82. mcp_hangar/infrastructure/__init__.py +79 -0
  83. mcp_hangar/infrastructure/async_executor.py +133 -0
  84. mcp_hangar/infrastructure/auth/__init__.py +37 -0
  85. mcp_hangar/infrastructure/auth/api_key_authenticator.py +388 -0
  86. mcp_hangar/infrastructure/auth/event_sourced_store.py +567 -0
  87. mcp_hangar/infrastructure/auth/jwt_authenticator.py +360 -0
  88. mcp_hangar/infrastructure/auth/middleware.py +340 -0
  89. mcp_hangar/infrastructure/auth/opa_authorizer.py +243 -0
  90. mcp_hangar/infrastructure/auth/postgres_store.py +659 -0
  91. mcp_hangar/infrastructure/auth/projections.py +366 -0
  92. mcp_hangar/infrastructure/auth/rate_limiter.py +311 -0
  93. mcp_hangar/infrastructure/auth/rbac_authorizer.py +323 -0
  94. mcp_hangar/infrastructure/auth/sqlite_store.py +624 -0
  95. mcp_hangar/infrastructure/command_bus.py +112 -0
  96. mcp_hangar/infrastructure/discovery/__init__.py +110 -0
  97. mcp_hangar/infrastructure/discovery/docker_source.py +289 -0
  98. mcp_hangar/infrastructure/discovery/entrypoint_source.py +249 -0
  99. mcp_hangar/infrastructure/discovery/filesystem_source.py +383 -0
  100. mcp_hangar/infrastructure/discovery/kubernetes_source.py +247 -0
  101. mcp_hangar/infrastructure/event_bus.py +260 -0
  102. mcp_hangar/infrastructure/event_sourced_repository.py +443 -0
  103. mcp_hangar/infrastructure/event_store.py +396 -0
  104. mcp_hangar/infrastructure/knowledge_base/__init__.py +259 -0
  105. mcp_hangar/infrastructure/knowledge_base/contracts.py +202 -0
  106. mcp_hangar/infrastructure/knowledge_base/memory.py +177 -0
  107. mcp_hangar/infrastructure/knowledge_base/postgres.py +545 -0
  108. mcp_hangar/infrastructure/knowledge_base/sqlite.py +513 -0
  109. mcp_hangar/infrastructure/metrics_publisher.py +36 -0
  110. mcp_hangar/infrastructure/observability/__init__.py +10 -0
  111. mcp_hangar/infrastructure/observability/langfuse_adapter.py +534 -0
  112. mcp_hangar/infrastructure/persistence/__init__.py +33 -0
  113. mcp_hangar/infrastructure/persistence/audit_repository.py +371 -0
  114. mcp_hangar/infrastructure/persistence/config_repository.py +398 -0
  115. mcp_hangar/infrastructure/persistence/database.py +333 -0
  116. mcp_hangar/infrastructure/persistence/database_common.py +330 -0
  117. mcp_hangar/infrastructure/persistence/event_serializer.py +280 -0
  118. mcp_hangar/infrastructure/persistence/event_upcaster.py +166 -0
  119. mcp_hangar/infrastructure/persistence/in_memory_event_store.py +150 -0
  120. mcp_hangar/infrastructure/persistence/recovery_service.py +312 -0
  121. mcp_hangar/infrastructure/persistence/sqlite_event_store.py +386 -0
  122. mcp_hangar/infrastructure/persistence/unit_of_work.py +409 -0
  123. mcp_hangar/infrastructure/persistence/upcasters/README.md +13 -0
  124. mcp_hangar/infrastructure/persistence/upcasters/__init__.py +7 -0
  125. mcp_hangar/infrastructure/query_bus.py +153 -0
  126. mcp_hangar/infrastructure/saga_manager.py +401 -0
  127. mcp_hangar/logging_config.py +209 -0
  128. mcp_hangar/metrics.py +1007 -0
  129. mcp_hangar/models.py +31 -0
  130. mcp_hangar/observability/__init__.py +54 -0
  131. mcp_hangar/observability/health.py +487 -0
  132. mcp_hangar/observability/metrics.py +319 -0
  133. mcp_hangar/observability/tracing.py +433 -0
  134. mcp_hangar/progress.py +542 -0
  135. mcp_hangar/retry.py +613 -0
  136. mcp_hangar/server/__init__.py +120 -0
  137. mcp_hangar/server/__main__.py +6 -0
  138. mcp_hangar/server/auth_bootstrap.py +340 -0
  139. mcp_hangar/server/auth_cli.py +335 -0
  140. mcp_hangar/server/auth_config.py +305 -0
  141. mcp_hangar/server/bootstrap.py +735 -0
  142. mcp_hangar/server/cli.py +161 -0
  143. mcp_hangar/server/config.py +224 -0
  144. mcp_hangar/server/context.py +215 -0
  145. mcp_hangar/server/http_auth_middleware.py +165 -0
  146. mcp_hangar/server/lifecycle.py +467 -0
  147. mcp_hangar/server/state.py +117 -0
  148. mcp_hangar/server/tools/__init__.py +16 -0
  149. mcp_hangar/server/tools/discovery.py +186 -0
  150. mcp_hangar/server/tools/groups.py +75 -0
  151. mcp_hangar/server/tools/health.py +301 -0
  152. mcp_hangar/server/tools/provider.py +939 -0
  153. mcp_hangar/server/tools/registry.py +320 -0
  154. mcp_hangar/server/validation.py +113 -0
  155. mcp_hangar/stdio_client.py +229 -0
  156. mcp_hangar-0.2.0.dist-info/METADATA +347 -0
  157. mcp_hangar-0.2.0.dist-info/RECORD +160 -0
  158. mcp_hangar-0.2.0.dist-info/WHEEL +4 -0
  159. mcp_hangar-0.2.0.dist-info/entry_points.txt +2 -0
  160. mcp_hangar-0.2.0.dist-info/licenses/LICENSE +21 -0
mcp_hangar/errors.py ADDED
@@ -0,0 +1,818 @@
1
+ """Human-readable error classes with recovery hints.
2
+
3
+ This module provides rich error types that include:
4
+
5
+ - Clear user-facing messages
6
+ - Technical details for debugging
7
+ - Actionable recovery hints
8
+ - Related log references
9
+
10
+ Error output example::
11
+
12
+ ProviderProtocolError: SQLite provider returned invalid response
13
+ ↳ Provider: sqlite
14
+ ↳ Operation: query
15
+ ↳ Details: Expected JSON object, received plain text
16
+
17
+ 💡 Recovery steps:
18
+ 1. Retry the operation (often transient)
19
+ 2. Check provider logs: registry_details('sqlite')
20
+ 3. If persistent, file bug report
21
+
22
+ See docs/guides/UX_IMPROVEMENTS.md for usage examples.
23
+ """
24
+
25
+ from dataclasses import dataclass, field
26
+ from typing import Any, Callable, Dict, List, Optional
27
+
28
+
29
+ @dataclass
30
+ class HangarError(Exception):
31
+ """Base exception with rich context for better UX.
32
+
33
+ All mcp-hangar errors inherit from this class, providing:
34
+ - User-friendly error messages
35
+ - Technical debugging information
36
+ - Actionable recovery steps
37
+ - Log file references when available
38
+ """
39
+
40
+ # User-facing
41
+ message: str
42
+ """Clear, non-technical explanation of what went wrong."""
43
+
44
+ recovery_hints: List[str] = field(default_factory=list)
45
+ """Actionable steps the user can take to resolve the issue."""
46
+
47
+ # Technical context
48
+ provider: str = ""
49
+ """The provider that caused the error."""
50
+
51
+ operation: str = ""
52
+ """The operation that was being performed."""
53
+
54
+ technical_details: str = ""
55
+ """Technical error details for debugging."""
56
+
57
+ # Debugging
58
+ related_logs: Optional[str] = None
59
+ """Path to relevant log entries (e.g., '/logs/mcp-hangar.log:580')."""
60
+
61
+ issue_url: Optional[str] = None
62
+ """Link to known issue or documentation."""
63
+
64
+ original_exception: Optional[Exception] = None
65
+ """The original exception that caused this error."""
66
+
67
+ context: Dict[str, Any] = field(default_factory=dict)
68
+ """Additional context data for debugging."""
69
+
70
+ def __post_init__(self):
71
+ super().__init__(self.message)
72
+
73
+ def __str__(self) -> str:
74
+ """Format error for display with recovery hints."""
75
+ output = [
76
+ f"\n{self.__class__.__name__}: {self.message}",
77
+ ]
78
+
79
+ if self.provider:
80
+ output.append(f" ↳ Provider: {self.provider}")
81
+ if self.operation:
82
+ output.append(f" ↳ Operation: {self.operation}")
83
+ if self.technical_details:
84
+ output.append(f" ↳ Details: {self.technical_details}")
85
+
86
+ if self.recovery_hints:
87
+ output.append("")
88
+ output.append("💡 Recovery steps:")
89
+ for i, hint in enumerate(self.recovery_hints, 1):
90
+ output.append(f" {i}. {hint}")
91
+
92
+ if self.related_logs:
93
+ output.append(f"\n📋 Related logs: {self.related_logs}")
94
+
95
+ if self.issue_url:
96
+ output.append(f"🔗 Known issue: {self.issue_url}")
97
+
98
+ return "\n".join(output)
99
+
100
+ def to_dict(self) -> Dict[str, Any]:
101
+ """Convert to dictionary for API responses."""
102
+ return {
103
+ "error_type": self.__class__.__name__,
104
+ "message": self.message,
105
+ "provider": self.provider,
106
+ "operation": self.operation,
107
+ "technical_details": self.technical_details,
108
+ "recovery_hints": self.recovery_hints,
109
+ "related_logs": self.related_logs,
110
+ "issue_url": self.issue_url,
111
+ "context": self.context,
112
+ }
113
+
114
+
115
+ @dataclass
116
+ class TransientError(HangarError):
117
+ """Temporary failure that may succeed on retry.
118
+
119
+ Examples:
120
+ - Network glitch
121
+ - Race condition during startup
122
+ - Malformed JSON response (often recovers)
123
+
124
+ The retry system will automatically retry these errors.
125
+ """
126
+
127
+ retryable: bool = True
128
+ """Indicates this error can be retried."""
129
+
130
+ suggested_delay: float = 1.0
131
+ """Suggested delay before retry in seconds."""
132
+
133
+
134
+ @dataclass
135
+ class ProviderProtocolError(HangarError):
136
+ """Provider violated MCP protocol.
137
+
138
+ This occurs when a provider sends an invalid response,
139
+ such as malformed JSON or unexpected data format.
140
+ """
141
+
142
+ raw_response: Optional[str] = None
143
+ """Preview of the invalid response (truncated for safety)."""
144
+
145
+ def __post_init__(self):
146
+ if not self.recovery_hints:
147
+ self.recovery_hints = [
148
+ "Retry the operation (often transient)",
149
+ f"Check provider logs: registry_details('{self.provider}')",
150
+ "If persistent, file bug report with raw response",
151
+ ]
152
+ super().__post_init__()
153
+
154
+
155
+ @dataclass
156
+ class ProviderCrashError(HangarError):
157
+ """Provider process terminated unexpectedly.
158
+
159
+ This occurs when a provider process dies or is killed,
160
+ either due to an internal error, resource limits, or idle timeout.
161
+ """
162
+
163
+ exit_code: Optional[int] = None
164
+ """Process exit code if available."""
165
+
166
+ signal_name: Optional[str] = None
167
+ """Signal name if killed by signal (e.g., SIGKILL)."""
168
+
169
+ idle_duration_s: Optional[float] = None
170
+ """How long the provider was idle before shutdown (if applicable)."""
171
+
172
+ def __post_init__(self):
173
+ if not self.recovery_hints:
174
+ hints = [
175
+ "Provider will auto-restart on next use",
176
+ ]
177
+ if self.idle_duration_s is not None:
178
+ hints.insert(0, f"Provider was idle for {self.idle_duration_s:.0f}s and forced shutdown")
179
+ hints.append("This is normal behavior for idle providers")
180
+ hints.append("If frequent, increase idle_ttl_s in config")
181
+ else:
182
+ hints.append(f"Check provider logs: registry_details('{self.provider}')")
183
+ hints.append("Check for memory/resource issues in container")
184
+ self.recovery_hints = hints
185
+ super().__post_init__()
186
+
187
+
188
+ @dataclass
189
+ class NetworkError(HangarError):
190
+ """Network connectivity issue.
191
+
192
+ This occurs when the hangar cannot reach a remote provider,
193
+ due to DNS issues, firewall rules, or network outages.
194
+ """
195
+
196
+ hostname: Optional[str] = None
197
+ """The hostname that was unreachable."""
198
+
199
+ error_code: Optional[str] = None
200
+ """Network error code (e.g., EAI_AGAIN for DNS)."""
201
+
202
+ def __post_init__(self):
203
+ if not self.recovery_hints:
204
+ hints = []
205
+ if self.hostname:
206
+ hints.append(f"Check network connectivity: ping {self.hostname}")
207
+ hints.append(f"Verify DNS resolution: nslookup {self.hostname}")
208
+ hints.append("Try with longer timeout: timeout=60")
209
+ hints.append("Check firewall/proxy settings")
210
+ self.recovery_hints = hints
211
+ super().__post_init__()
212
+
213
+
214
+ @dataclass
215
+ class ConfigurationError(HangarError):
216
+ """User configuration problem.
217
+
218
+ This occurs when there's an issue with the configuration file,
219
+ missing required settings, or invalid values.
220
+ """
221
+
222
+ config_path: Optional[str] = None
223
+ """Path to the configuration file."""
224
+
225
+ field_name: Optional[str] = None
226
+ """Name of the problematic configuration field."""
227
+
228
+ def __post_init__(self):
229
+ if not self.recovery_hints:
230
+ hints = []
231
+ if self.config_path:
232
+ hints.append(f"Check config file at: {self.config_path}")
233
+ if self.field_name:
234
+ hints.append(f"Review the '{self.field_name}' setting")
235
+ hints.append("Use registry_discover() to auto-detect providers")
236
+ hints.append("Check example config: docs/configuration.md")
237
+ self.recovery_hints = hints
238
+ super().__post_init__()
239
+
240
+
241
+ @dataclass
242
+ class ProviderNotFoundError(HangarError):
243
+ """Provider not found in registry.
244
+
245
+ The specified provider ID doesn't exist in the configuration.
246
+ """
247
+
248
+ available_providers: List[str] = field(default_factory=list)
249
+ """List of available provider IDs."""
250
+
251
+ def __post_init__(self):
252
+ if not self.recovery_hints:
253
+ hints = [
254
+ "Use registry_list() to see available providers",
255
+ ]
256
+ if self.available_providers:
257
+ similar = self._find_similar()
258
+ if similar:
259
+ hints.append(f"Did you mean: {similar}?")
260
+ hints.append("Add provider to config.yaml")
261
+ self.recovery_hints = hints
262
+ super().__post_init__()
263
+
264
+ def _find_similar(self) -> Optional[str]:
265
+ """Find similar provider name for 'did you mean' suggestion."""
266
+ if not self.provider or not self.available_providers:
267
+ return None
268
+
269
+ target = self.provider.lower()
270
+ best_match = None
271
+ best_score = 0
272
+
273
+ for name in self.available_providers:
274
+ name_lower = name.lower()
275
+ # Simple substring matching
276
+ if target in name_lower or name_lower in target:
277
+ score = len(set(target) & set(name_lower))
278
+ if score > best_score:
279
+ best_score = score
280
+ best_match = name
281
+
282
+ return best_match
283
+
284
+
285
+ @dataclass
286
+ class ToolNotFoundError(HangarError):
287
+ """Tool not found in provider's catalog.
288
+
289
+ The specified tool doesn't exist on this provider.
290
+ """
291
+
292
+ tool_name: str = ""
293
+ """The tool name that wasn't found."""
294
+
295
+ available_tools: List[str] = field(default_factory=list)
296
+ """List of available tool names."""
297
+
298
+ def __post_init__(self):
299
+ if not self.recovery_hints:
300
+ hints = [
301
+ f"Use registry_tools('{self.provider}') to see available tools",
302
+ ]
303
+ if self.available_tools:
304
+ similar = self._find_similar()
305
+ if similar:
306
+ hints.append(f"Did you mean: {similar}?")
307
+ self.recovery_hints = hints
308
+ super().__post_init__()
309
+
310
+ def _find_similar(self) -> Optional[str]:
311
+ """Find similar tool name for 'did you mean' suggestion."""
312
+ if not self.tool_name or not self.available_tools:
313
+ return None
314
+
315
+ target = self.tool_name.lower()
316
+ for name in self.available_tools:
317
+ if target in name.lower() or name.lower() in target:
318
+ return name
319
+ return None
320
+
321
+
322
+ @dataclass
323
+ class TimeoutError(HangarError):
324
+ """Operation timed out.
325
+
326
+ The operation took longer than the specified timeout.
327
+ """
328
+
329
+ timeout_seconds: float = 0.0
330
+ """The timeout that was exceeded."""
331
+
332
+ elapsed_seconds: float = 0.0
333
+ """How long the operation ran before timing out."""
334
+
335
+ def __post_init__(self):
336
+ if not self.recovery_hints:
337
+ self.recovery_hints = [
338
+ f"Increase timeout: timeout={int(self.timeout_seconds * 2)}",
339
+ "Check if provider is overloaded",
340
+ f"Check provider health: registry_details('{self.provider}')",
341
+ ]
342
+ super().__post_init__()
343
+
344
+
345
+ @dataclass
346
+ class RateLimitError(HangarError):
347
+ """Rate limit exceeded.
348
+
349
+ Too many requests in the given time window.
350
+ """
351
+
352
+ limit: int = 0
353
+ """The rate limit that was exceeded."""
354
+
355
+ window_seconds: int = 0
356
+ """The time window for the rate limit."""
357
+
358
+ retry_after_seconds: float = 0.0
359
+ """Suggested wait time before retrying."""
360
+
361
+ def __post_init__(self):
362
+ if not self.recovery_hints:
363
+ self.recovery_hints = [
364
+ f"Wait {self.retry_after_seconds:.1f}s before retrying",
365
+ "Reduce request frequency",
366
+ "Configure higher rate limits in config.yaml",
367
+ ]
368
+ super().__post_init__()
369
+
370
+
371
+ @dataclass
372
+ class ProviderDegradedError(HangarError):
373
+ """Provider is in degraded state.
374
+
375
+ The provider has experienced multiple failures and is
376
+ in a backoff period.
377
+ """
378
+
379
+ consecutive_failures: int = 0
380
+ """Number of consecutive failures."""
381
+
382
+ backoff_remaining_s: float = 0.0
383
+ """Time remaining in backoff period."""
384
+
385
+ def __post_init__(self):
386
+ if not self.recovery_hints:
387
+ self.recovery_hints = [
388
+ f"Wait {self.backoff_remaining_s:.1f}s for automatic recovery",
389
+ f"Provider had {self.consecutive_failures} consecutive failures",
390
+ "Check provider logs for root cause",
391
+ "Use registry_start() to force restart",
392
+ ]
393
+ super().__post_init__()
394
+
395
+
396
+ # =============================================================================
397
+ # Error Mapping Utilities
398
+ # =============================================================================
399
+
400
+
401
+ def _matches_keywords(text: str, keywords: list[str]) -> bool:
402
+ """Check if text contains any of the keywords (case-insensitive)."""
403
+ text_lower = text.lower()
404
+ return any(kw in text_lower for kw in keywords)
405
+
406
+
407
+ def _create_json_error(exc: Exception, provider: str, operation: str, context: dict) -> HangarError:
408
+ """Create error for JSON parsing failures."""
409
+ exc_str = str(exc)
410
+ preview = exc_str[:100] if len(exc_str) > 100 else exc_str
411
+ return ProviderProtocolError(
412
+ message=f"{provider or 'Provider'} returned invalid response",
413
+ provider=provider,
414
+ operation=operation,
415
+ technical_details=f"JSON parse error: {exc_str}",
416
+ raw_response=preview,
417
+ original_exception=exc,
418
+ context=context,
419
+ )
420
+
421
+
422
+ def _create_timeout_error(exc: Exception, provider: str, operation: str, context: dict) -> HangarError:
423
+ """Create error for timeout failures."""
424
+ timeout = context.get("timeout", 30.0)
425
+ return TimeoutError(
426
+ message=f"Operation timed out after {timeout}s",
427
+ provider=provider,
428
+ operation=operation,
429
+ technical_details=str(exc),
430
+ timeout_seconds=timeout,
431
+ original_exception=exc,
432
+ context=context,
433
+ )
434
+
435
+
436
+ def _create_network_error(exc: Exception, provider: str, operation: str, context: dict) -> HangarError:
437
+ """Create error for network failures."""
438
+ return NetworkError(
439
+ message=f"Unable to reach {provider or 'provider'}",
440
+ provider=provider,
441
+ operation=operation,
442
+ technical_details=str(exc),
443
+ original_exception=exc,
444
+ context=context,
445
+ )
446
+
447
+
448
+ def _create_crash_error(exc: Exception, provider: str, operation: str, context: dict) -> HangarError:
449
+ """Create error for process crashes."""
450
+ exit_code = context.get("exit_code")
451
+ signal_name = None
452
+ if exit_code and exit_code < 0:
453
+ import signal as sig
454
+
455
+ try:
456
+ signal_name = sig.Signals(-exit_code).name
457
+ except (ValueError, AttributeError):
458
+ pass
459
+
460
+ return ProviderCrashError(
461
+ message=f"{provider or 'Provider'} terminated unexpectedly",
462
+ provider=provider,
463
+ operation=operation,
464
+ technical_details=str(exc),
465
+ exit_code=exit_code,
466
+ signal_name=signal_name,
467
+ original_exception=exc,
468
+ context=context,
469
+ )
470
+
471
+
472
+ def _create_rate_limit_error(exc: Exception, provider: str, operation: str, context: dict) -> HangarError:
473
+ """Create error for rate limit failures."""
474
+ return RateLimitError(
475
+ message="Too many requests",
476
+ provider=provider,
477
+ operation=operation,
478
+ technical_details=str(exc),
479
+ original_exception=exc,
480
+ context=context,
481
+ )
482
+
483
+
484
+ def _create_provider_not_found_error(exc: Exception, provider: str, operation: str, context: dict) -> HangarError:
485
+ """Create error for provider not found."""
486
+ return ProviderNotFoundError(
487
+ message=f"Provider '{provider}' not found",
488
+ provider=provider,
489
+ operation=operation,
490
+ technical_details=str(exc),
491
+ original_exception=exc,
492
+ context=context,
493
+ )
494
+
495
+
496
+ def _create_tool_not_found_error(exc: Exception, provider: str, operation: str, context: dict) -> HangarError:
497
+ """Create error for tool not found."""
498
+ tool_name = context.get("tool_name", "")
499
+ return ToolNotFoundError(
500
+ message=f"Tool '{tool_name}' not found on provider '{provider}'",
501
+ provider=provider,
502
+ operation=operation,
503
+ tool_name=tool_name,
504
+ technical_details=str(exc),
505
+ original_exception=exc,
506
+ context=context,
507
+ )
508
+
509
+
510
+ def _create_client_error(exc: Exception, provider: str, operation: str, context: dict) -> HangarError:
511
+ """Create error for client communication failures."""
512
+ exc_str = str(exc)
513
+ if _matches_keywords(exc_str, ["malformed", "json"]):
514
+ return TransientError(
515
+ message=f"Communication error with {provider or 'provider'}",
516
+ provider=provider,
517
+ operation=operation,
518
+ technical_details=exc_str,
519
+ recovery_hints=[
520
+ "This is usually a transient error",
521
+ "Retry the operation",
522
+ f"Check provider status: registry_details('{provider}')",
523
+ ],
524
+ original_exception=exc,
525
+ context=context,
526
+ )
527
+
528
+ return HangarError(
529
+ message=f"Client error: {exc_str}",
530
+ provider=provider,
531
+ operation=operation,
532
+ technical_details=exc_str,
533
+ recovery_hints=[
534
+ "Check provider status",
535
+ "Restart the provider if needed",
536
+ ],
537
+ original_exception=exc,
538
+ context=context,
539
+ )
540
+
541
+
542
+ def _create_generic_error(exc: Exception, provider: str, operation: str, context: dict) -> HangarError:
543
+ """Create generic error as fallback."""
544
+ exc_str = str(exc)
545
+ exc_type = type(exc).__name__
546
+ return HangarError(
547
+ message=f"Operation failed: {exc_str}",
548
+ provider=provider,
549
+ operation=operation,
550
+ technical_details=f"{exc_type}: {exc_str}",
551
+ recovery_hints=[
552
+ "Check the logs for more details",
553
+ f"Provider status: registry_details('{provider}')" if provider else "Check provider configuration",
554
+ ],
555
+ original_exception=exc,
556
+ context=context,
557
+ )
558
+
559
+
560
+ # Error detection rules: (keywords to match, detector function, creator function)
561
+ _ERROR_MATCHERS: list[tuple[list[str], Callable, Callable]] = [
562
+ # JSON errors - check type name and message
563
+ (
564
+ ["json"],
565
+ lambda exc_type, exc_str: "JSONDecodeError" in exc_type or "json" in exc_str.lower(),
566
+ _create_json_error,
567
+ ),
568
+ # Timeout errors
569
+ (
570
+ ["timeout"],
571
+ lambda exc_type, exc_str: "timeout" in exc_str.lower() or "TimeoutError" in exc_type,
572
+ _create_timeout_error,
573
+ ),
574
+ # Network errors
575
+ (
576
+ ["connection", "network", "dns", "eai_again", "econnrefused"],
577
+ lambda exc_type, exc_str: _matches_keywords(
578
+ exc_str, ["connection", "network", "dns", "eai_again", "econnrefused"]
579
+ ),
580
+ _create_network_error,
581
+ ),
582
+ # Crash errors
583
+ (
584
+ ["exit code", "sigkill", "terminated", "process died"],
585
+ lambda exc_type, exc_str: _matches_keywords(exc_str, ["exit code", "sigkill", "terminated", "process died"]),
586
+ _create_crash_error,
587
+ ),
588
+ # Rate limit
589
+ (["rate limit"], lambda exc_type, exc_str: "rate limit" in exc_str.lower(), _create_rate_limit_error),
590
+ # Provider not found
591
+ (
592
+ ["not found", "provider"],
593
+ lambda exc_type, exc_str: "not found" in exc_str.lower() and "provider" in exc_str.lower(),
594
+ _create_provider_not_found_error,
595
+ ),
596
+ # Tool not found
597
+ (
598
+ ["not found", "tool"],
599
+ lambda exc_type, exc_str: "not found" in exc_str.lower() and "tool" in exc_str.lower(),
600
+ _create_tool_not_found_error,
601
+ ),
602
+ # Client errors
603
+ (
604
+ ["client"],
605
+ lambda exc_type, exc_str: "client" in exc_str.lower() or "ClientError" in exc_type,
606
+ _create_client_error,
607
+ ),
608
+ ]
609
+
610
+
611
+ def map_exception_to_hangar_error(
612
+ exc: Exception,
613
+ provider: str = "",
614
+ operation: str = "",
615
+ context: Optional[Dict[str, Any]] = None,
616
+ ) -> HangarError:
617
+ """Map a low-level exception to a rich HangarError.
618
+
619
+ This function converts Python exceptions and domain exceptions
620
+ into user-friendly HangarError instances with appropriate
621
+ recovery hints.
622
+
623
+ Args:
624
+ exc: The original exception.
625
+ provider: Provider ID if known.
626
+ operation: Operation being performed.
627
+ context: Additional context data.
628
+
629
+ Returns:
630
+ A HangarError subclass appropriate for the exception type.
631
+ """
632
+ context = context or {}
633
+
634
+ # Already a HangarError - return as-is
635
+ if isinstance(exc, HangarError):
636
+ return exc
637
+
638
+ exc_str = str(exc)
639
+ exc_type = type(exc).__name__
640
+
641
+ # Try each matcher in order
642
+ for _, detector, creator in _ERROR_MATCHERS:
643
+ if detector(exc_type, exc_str):
644
+ return creator(exc, provider, operation, context)
645
+
646
+ # Default: wrap as generic HangarError
647
+ return _create_generic_error(exc, provider, operation, context)
648
+
649
+
650
+ def is_retryable(error: Exception) -> bool:
651
+ """Check if an error is retryable.
652
+
653
+ Args:
654
+ error: The error to check
655
+
656
+ Returns:
657
+ True if the error is transient and can be retried
658
+ """
659
+ if isinstance(error, TransientError):
660
+ return error.retryable
661
+
662
+ if isinstance(error, HangarError):
663
+ # Specific types that are retryable
664
+ if isinstance(error, (ProviderProtocolError, NetworkError, TimeoutError)):
665
+ return True
666
+ return False
667
+
668
+ # For non-HangarError, check common patterns
669
+ exc_str = str(error).lower()
670
+ exc_type = type(error).__name__.lower()
671
+
672
+ retryable_patterns = [
673
+ "timeout",
674
+ "timed out",
675
+ "connection",
676
+ "json",
677
+ "malformed",
678
+ "temporary",
679
+ "transient",
680
+ "retry",
681
+ "network",
682
+ ]
683
+
684
+ for pattern in retryable_patterns:
685
+ if pattern in exc_str or pattern in exc_type:
686
+ return True
687
+
688
+ return False
689
+
690
+
691
+ class ErrorClassifier:
692
+ """Classifies errors as transient or permanent and provides recovery hints.
693
+
694
+ This classifier analyzes error messages and types to determine:
695
+ - Whether the error is transient (retry may help) or permanent (retry won't help)
696
+ - Specific error reason for debugging
697
+ - Actionable recovery hints for users
698
+ """
699
+
700
+ # Transient errors - these should be retried
701
+ TRANSIENT_PATTERNS = {
702
+ "timeout": "Retry with longer timeout or wait for provider recovery",
703
+ "timed out": "Retry with longer timeout or wait for provider recovery",
704
+ "connection_refused": "Provider may be starting, retry in 1-2 seconds",
705
+ "connection refused": "Provider may be starting, retry in 1-2 seconds",
706
+ "service_unavailable": "Provider overloaded, implement backoff",
707
+ "service unavailable": "Provider overloaded, implement backoff",
708
+ "network_error": "Check network connectivity, retry",
709
+ "network error": "Check network connectivity, retry",
710
+ "econnrefused": "Provider may be starting, retry in 1-2 seconds",
711
+ "temporary": "Retry the operation",
712
+ "transient": "Retry the operation",
713
+ "json": "Provider returned malformed response, retry",
714
+ "malformed": "Provider returned malformed response, retry",
715
+ "connection reset": "Network issue, retry",
716
+ "broken pipe": "Network issue, retry",
717
+ }
718
+
719
+ # Permanent errors - these should NOT be retried
720
+ PERMANENT_PATTERNS = {
721
+ "division by zero": ("validation_error", ["Check arguments: divisor cannot be zero"]),
722
+ "zerodivision": ("validation_error", ["Check arguments: divisor cannot be zero"]),
723
+ "invalid_argument": ("validation_error", ["Review tool schema and fix arguments"]),
724
+ "invalid argument": ("validation_error", ["Review tool schema and fix arguments"]),
725
+ "tool_not_found": ("configuration_error", ["Verify tool name exists on provider"]),
726
+ "tool not found": ("configuration_error", ["Verify tool name exists on provider"]),
727
+ "provider_not_found": ("configuration_error", ["Check provider ID in registry_status()"]),
728
+ "provider not found": ("configuration_error", ["Check provider ID in registry_status()"]),
729
+ "permission_denied": ("authorization_error", ["Verify permissions for requested resource"]),
730
+ "permission denied": ("authorization_error", ["Verify permissions for requested resource"]),
731
+ "access denied": ("authorization_error", ["Path outside allowed directories or resource access not permitted"]),
732
+ "unauthorized": ("authorization_error", ["Check authentication credentials"]),
733
+ "forbidden": ("authorization_error", ["Check authorization permissions"]),
734
+ "not found": ("not_found_error", ["Verify the resource or tool exists"]),
735
+ "value error": ("validation_error", ["Check input arguments format and values"]),
736
+ "valueerror": ("validation_error", ["Check input arguments format and values"]),
737
+ "type error": ("validation_error", ["Check input arguments types"]),
738
+ "typeerror": ("validation_error", ["Check input arguments types"]),
739
+ "file not found": ("resource_error", ["Verify the file path exists"]),
740
+ "no such file": ("resource_error", ["Verify the file path exists"]),
741
+ "syntax error": ("validation_error", ["Check input syntax (e.g., SQL, JSON)"]),
742
+ "invalid syntax": ("validation_error", ["Check input syntax"]),
743
+ }
744
+
745
+ @classmethod
746
+ def classify(cls, error: Exception) -> Dict[str, Any]:
747
+ """Classify an error and return metadata.
748
+
749
+ Args:
750
+ error: The exception to classify
751
+
752
+ Returns:
753
+ Dictionary with:
754
+ - is_transient: bool - whether retry may help
755
+ - final_error_reason: str - classification like "permanent: validation_error"
756
+ - recovery_hints: List[str] - actionable steps
757
+ - should_retry: bool - recommendation
758
+ """
759
+ error_message = str(error).lower()
760
+ error_type = type(error).__name__
761
+
762
+ # Check if it's already a HangarError with hints
763
+ if isinstance(error, HangarError):
764
+ is_transient = isinstance(error, (TransientError, ProviderProtocolError, NetworkError, TimeoutError))
765
+ return {
766
+ "is_transient": is_transient,
767
+ "final_error_reason": f"{'transient' if is_transient else 'permanent'}: {error_type}",
768
+ "recovery_hints": error.recovery_hints if error.recovery_hints else cls._default_hints(is_transient),
769
+ "should_retry": is_transient,
770
+ }
771
+
772
+ # Check permanent patterns first (more specific)
773
+ for pattern, (reason, hints) in cls.PERMANENT_PATTERNS.items():
774
+ if pattern in error_message or pattern in error_type.lower():
775
+ return {
776
+ "is_transient": False,
777
+ "final_error_reason": f"permanent: {reason}",
778
+ "recovery_hints": hints,
779
+ "should_retry": False,
780
+ }
781
+
782
+ # Check transient patterns
783
+ for pattern, hint in cls.TRANSIENT_PATTERNS.items():
784
+ if pattern in error_message or pattern in error_type.lower():
785
+ return {
786
+ "is_transient": True,
787
+ "final_error_reason": f"transient: {pattern.replace('_', ' ')}",
788
+ "recovery_hints": [hint],
789
+ "should_retry": True,
790
+ }
791
+
792
+ # Use is_retryable() for additional checks
793
+ if is_retryable(error):
794
+ return {
795
+ "is_transient": True,
796
+ "final_error_reason": "transient: unclassified",
797
+ "recovery_hints": ["Retry the operation", "Check provider status"],
798
+ "should_retry": True,
799
+ }
800
+
801
+ # Unknown = conservative approach (assume may be transient for safety)
802
+ return {
803
+ "is_transient": True,
804
+ "final_error_reason": "unknown: unclassified error",
805
+ "recovery_hints": [
806
+ "Check the error message",
807
+ "Retry may help",
808
+ "Contact support if persistent",
809
+ ],
810
+ "should_retry": True,
811
+ }
812
+
813
+ @classmethod
814
+ def _default_hints(cls, is_transient: bool) -> List[str]:
815
+ """Get default hints based on error type."""
816
+ if is_transient:
817
+ return ["Retry the operation", "Check provider logs for details"]
818
+ return ["Review the error message", "Check input arguments", "Verify provider configuration"]