codepp 0.0.437__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (288) hide show
  1. code_puppy/__init__.py +10 -0
  2. code_puppy/__main__.py +10 -0
  3. code_puppy/agents/__init__.py +31 -0
  4. code_puppy/agents/agent_c_reviewer.py +155 -0
  5. code_puppy/agents/agent_code_puppy.py +117 -0
  6. code_puppy/agents/agent_code_reviewer.py +90 -0
  7. code_puppy/agents/agent_cpp_reviewer.py +132 -0
  8. code_puppy/agents/agent_creator_agent.py +638 -0
  9. code_puppy/agents/agent_golang_reviewer.py +151 -0
  10. code_puppy/agents/agent_helios.py +124 -0
  11. code_puppy/agents/agent_javascript_reviewer.py +160 -0
  12. code_puppy/agents/agent_manager.py +742 -0
  13. code_puppy/agents/agent_pack_leader.py +385 -0
  14. code_puppy/agents/agent_planning.py +165 -0
  15. code_puppy/agents/agent_python_programmer.py +169 -0
  16. code_puppy/agents/agent_python_reviewer.py +90 -0
  17. code_puppy/agents/agent_qa_expert.py +163 -0
  18. code_puppy/agents/agent_qa_kitten.py +208 -0
  19. code_puppy/agents/agent_scheduler.py +121 -0
  20. code_puppy/agents/agent_security_auditor.py +181 -0
  21. code_puppy/agents/agent_terminal_qa.py +323 -0
  22. code_puppy/agents/agent_typescript_reviewer.py +166 -0
  23. code_puppy/agents/base_agent.py +2156 -0
  24. code_puppy/agents/event_stream_handler.py +348 -0
  25. code_puppy/agents/json_agent.py +202 -0
  26. code_puppy/agents/pack/__init__.py +34 -0
  27. code_puppy/agents/pack/bloodhound.py +304 -0
  28. code_puppy/agents/pack/husky.py +327 -0
  29. code_puppy/agents/pack/retriever.py +393 -0
  30. code_puppy/agents/pack/shepherd.py +348 -0
  31. code_puppy/agents/pack/terrier.py +287 -0
  32. code_puppy/agents/pack/watchdog.py +367 -0
  33. code_puppy/agents/prompt_reviewer.py +145 -0
  34. code_puppy/agents/subagent_stream_handler.py +276 -0
  35. code_puppy/api/__init__.py +13 -0
  36. code_puppy/api/app.py +169 -0
  37. code_puppy/api/main.py +21 -0
  38. code_puppy/api/pty_manager.py +453 -0
  39. code_puppy/api/routers/__init__.py +12 -0
  40. code_puppy/api/routers/agents.py +36 -0
  41. code_puppy/api/routers/commands.py +217 -0
  42. code_puppy/api/routers/config.py +75 -0
  43. code_puppy/api/routers/sessions.py +234 -0
  44. code_puppy/api/templates/terminal.html +361 -0
  45. code_puppy/api/websocket.py +154 -0
  46. code_puppy/callbacks.py +692 -0
  47. code_puppy/chatgpt_codex_client.py +338 -0
  48. code_puppy/claude_cache_client.py +672 -0
  49. code_puppy/cli_runner.py +1073 -0
  50. code_puppy/command_line/__init__.py +1 -0
  51. code_puppy/command_line/add_model_menu.py +1092 -0
  52. code_puppy/command_line/agent_menu.py +662 -0
  53. code_puppy/command_line/attachments.py +395 -0
  54. code_puppy/command_line/autosave_menu.py +704 -0
  55. code_puppy/command_line/clipboard.py +527 -0
  56. code_puppy/command_line/colors_menu.py +532 -0
  57. code_puppy/command_line/command_handler.py +293 -0
  58. code_puppy/command_line/command_registry.py +150 -0
  59. code_puppy/command_line/config_commands.py +719 -0
  60. code_puppy/command_line/core_commands.py +867 -0
  61. code_puppy/command_line/diff_menu.py +865 -0
  62. code_puppy/command_line/file_path_completion.py +73 -0
  63. code_puppy/command_line/load_context_completion.py +52 -0
  64. code_puppy/command_line/mcp/__init__.py +10 -0
  65. code_puppy/command_line/mcp/base.py +32 -0
  66. code_puppy/command_line/mcp/catalog_server_installer.py +175 -0
  67. code_puppy/command_line/mcp/custom_server_form.py +688 -0
  68. code_puppy/command_line/mcp/custom_server_installer.py +195 -0
  69. code_puppy/command_line/mcp/edit_command.py +148 -0
  70. code_puppy/command_line/mcp/handler.py +138 -0
  71. code_puppy/command_line/mcp/help_command.py +147 -0
  72. code_puppy/command_line/mcp/install_command.py +214 -0
  73. code_puppy/command_line/mcp/install_menu.py +705 -0
  74. code_puppy/command_line/mcp/list_command.py +94 -0
  75. code_puppy/command_line/mcp/logs_command.py +235 -0
  76. code_puppy/command_line/mcp/remove_command.py +82 -0
  77. code_puppy/command_line/mcp/restart_command.py +100 -0
  78. code_puppy/command_line/mcp/search_command.py +123 -0
  79. code_puppy/command_line/mcp/start_all_command.py +135 -0
  80. code_puppy/command_line/mcp/start_command.py +117 -0
  81. code_puppy/command_line/mcp/status_command.py +184 -0
  82. code_puppy/command_line/mcp/stop_all_command.py +112 -0
  83. code_puppy/command_line/mcp/stop_command.py +80 -0
  84. code_puppy/command_line/mcp/test_command.py +107 -0
  85. code_puppy/command_line/mcp/utils.py +129 -0
  86. code_puppy/command_line/mcp/wizard_utils.py +334 -0
  87. code_puppy/command_line/mcp_completion.py +174 -0
  88. code_puppy/command_line/model_picker_completion.py +197 -0
  89. code_puppy/command_line/model_settings_menu.py +932 -0
  90. code_puppy/command_line/motd.py +96 -0
  91. code_puppy/command_line/onboarding_slides.py +179 -0
  92. code_puppy/command_line/onboarding_wizard.py +342 -0
  93. code_puppy/command_line/pin_command_completion.py +329 -0
  94. code_puppy/command_line/prompt_toolkit_completion.py +846 -0
  95. code_puppy/command_line/session_commands.py +302 -0
  96. code_puppy/command_line/shell_passthrough.py +145 -0
  97. code_puppy/command_line/skills_completion.py +160 -0
  98. code_puppy/command_line/uc_menu.py +893 -0
  99. code_puppy/command_line/utils.py +93 -0
  100. code_puppy/command_line/wiggum_state.py +78 -0
  101. code_puppy/config.py +1770 -0
  102. code_puppy/error_logging.py +134 -0
  103. code_puppy/gemini_code_assist.py +385 -0
  104. code_puppy/gemini_model.py +754 -0
  105. code_puppy/hook_engine/README.md +105 -0
  106. code_puppy/hook_engine/__init__.py +21 -0
  107. code_puppy/hook_engine/aliases.py +155 -0
  108. code_puppy/hook_engine/engine.py +221 -0
  109. code_puppy/hook_engine/executor.py +296 -0
  110. code_puppy/hook_engine/matcher.py +156 -0
  111. code_puppy/hook_engine/models.py +240 -0
  112. code_puppy/hook_engine/registry.py +106 -0
  113. code_puppy/hook_engine/validator.py +144 -0
  114. code_puppy/http_utils.py +361 -0
  115. code_puppy/keymap.py +128 -0
  116. code_puppy/main.py +10 -0
  117. code_puppy/mcp_/__init__.py +66 -0
  118. code_puppy/mcp_/async_lifecycle.py +286 -0
  119. code_puppy/mcp_/blocking_startup.py +469 -0
  120. code_puppy/mcp_/captured_stdio_server.py +275 -0
  121. code_puppy/mcp_/circuit_breaker.py +290 -0
  122. code_puppy/mcp_/config_wizard.py +507 -0
  123. code_puppy/mcp_/dashboard.py +308 -0
  124. code_puppy/mcp_/error_isolation.py +407 -0
  125. code_puppy/mcp_/examples/retry_example.py +226 -0
  126. code_puppy/mcp_/health_monitor.py +589 -0
  127. code_puppy/mcp_/managed_server.py +428 -0
  128. code_puppy/mcp_/manager.py +807 -0
  129. code_puppy/mcp_/mcp_logs.py +224 -0
  130. code_puppy/mcp_/registry.py +451 -0
  131. code_puppy/mcp_/retry_manager.py +337 -0
  132. code_puppy/mcp_/server_registry_catalog.py +1126 -0
  133. code_puppy/mcp_/status_tracker.py +355 -0
  134. code_puppy/mcp_/system_tools.py +209 -0
  135. code_puppy/mcp_prompts/__init__.py +1 -0
  136. code_puppy/mcp_prompts/hook_creator.py +103 -0
  137. code_puppy/messaging/__init__.py +255 -0
  138. code_puppy/messaging/bus.py +613 -0
  139. code_puppy/messaging/commands.py +167 -0
  140. code_puppy/messaging/markdown_patches.py +57 -0
  141. code_puppy/messaging/message_queue.py +361 -0
  142. code_puppy/messaging/messages.py +569 -0
  143. code_puppy/messaging/queue_console.py +271 -0
  144. code_puppy/messaging/renderers.py +311 -0
  145. code_puppy/messaging/rich_renderer.py +1158 -0
  146. code_puppy/messaging/spinner/__init__.py +83 -0
  147. code_puppy/messaging/spinner/console_spinner.py +240 -0
  148. code_puppy/messaging/spinner/spinner_base.py +95 -0
  149. code_puppy/messaging/subagent_console.py +460 -0
  150. code_puppy/model_factory.py +848 -0
  151. code_puppy/model_switching.py +63 -0
  152. code_puppy/model_utils.py +168 -0
  153. code_puppy/models.json +174 -0
  154. code_puppy/models_dev_api.json +1 -0
  155. code_puppy/models_dev_parser.py +592 -0
  156. code_puppy/plugins/__init__.py +186 -0
  157. code_puppy/plugins/agent_skills/__init__.py +22 -0
  158. code_puppy/plugins/agent_skills/config.py +175 -0
  159. code_puppy/plugins/agent_skills/discovery.py +136 -0
  160. code_puppy/plugins/agent_skills/downloader.py +392 -0
  161. code_puppy/plugins/agent_skills/installer.py +22 -0
  162. code_puppy/plugins/agent_skills/metadata.py +219 -0
  163. code_puppy/plugins/agent_skills/prompt_builder.py +60 -0
  164. code_puppy/plugins/agent_skills/register_callbacks.py +241 -0
  165. code_puppy/plugins/agent_skills/remote_catalog.py +322 -0
  166. code_puppy/plugins/agent_skills/skill_catalog.py +257 -0
  167. code_puppy/plugins/agent_skills/skills_install_menu.py +664 -0
  168. code_puppy/plugins/agent_skills/skills_menu.py +781 -0
  169. code_puppy/plugins/antigravity_oauth/__init__.py +10 -0
  170. code_puppy/plugins/antigravity_oauth/accounts.py +406 -0
  171. code_puppy/plugins/antigravity_oauth/antigravity_model.py +706 -0
  172. code_puppy/plugins/antigravity_oauth/config.py +42 -0
  173. code_puppy/plugins/antigravity_oauth/constants.py +133 -0
  174. code_puppy/plugins/antigravity_oauth/oauth.py +478 -0
  175. code_puppy/plugins/antigravity_oauth/register_callbacks.py +518 -0
  176. code_puppy/plugins/antigravity_oauth/storage.py +288 -0
  177. code_puppy/plugins/antigravity_oauth/test_plugin.py +319 -0
  178. code_puppy/plugins/antigravity_oauth/token.py +167 -0
  179. code_puppy/plugins/antigravity_oauth/transport.py +863 -0
  180. code_puppy/plugins/antigravity_oauth/utils.py +168 -0
  181. code_puppy/plugins/chatgpt_oauth/__init__.py +8 -0
  182. code_puppy/plugins/chatgpt_oauth/config.py +52 -0
  183. code_puppy/plugins/chatgpt_oauth/oauth_flow.py +329 -0
  184. code_puppy/plugins/chatgpt_oauth/register_callbacks.py +176 -0
  185. code_puppy/plugins/chatgpt_oauth/test_plugin.py +301 -0
  186. code_puppy/plugins/chatgpt_oauth/utils.py +523 -0
  187. code_puppy/plugins/claude_code_hooks/__init__.py +1 -0
  188. code_puppy/plugins/claude_code_hooks/config.py +137 -0
  189. code_puppy/plugins/claude_code_hooks/register_callbacks.py +175 -0
  190. code_puppy/plugins/claude_code_oauth/README.md +167 -0
  191. code_puppy/plugins/claude_code_oauth/SETUP.md +93 -0
  192. code_puppy/plugins/claude_code_oauth/__init__.py +25 -0
  193. code_puppy/plugins/claude_code_oauth/config.py +52 -0
  194. code_puppy/plugins/claude_code_oauth/register_callbacks.py +453 -0
  195. code_puppy/plugins/claude_code_oauth/test_plugin.py +283 -0
  196. code_puppy/plugins/claude_code_oauth/token_refresh_heartbeat.py +241 -0
  197. code_puppy/plugins/claude_code_oauth/utils.py +640 -0
  198. code_puppy/plugins/customizable_commands/__init__.py +0 -0
  199. code_puppy/plugins/customizable_commands/register_callbacks.py +152 -0
  200. code_puppy/plugins/example_custom_command/README.md +280 -0
  201. code_puppy/plugins/example_custom_command/register_callbacks.py +51 -0
  202. code_puppy/plugins/file_permission_handler/__init__.py +4 -0
  203. code_puppy/plugins/file_permission_handler/register_callbacks.py +470 -0
  204. code_puppy/plugins/frontend_emitter/__init__.py +25 -0
  205. code_puppy/plugins/frontend_emitter/emitter.py +121 -0
  206. code_puppy/plugins/frontend_emitter/register_callbacks.py +261 -0
  207. code_puppy/plugins/hook_creator/__init__.py +1 -0
  208. code_puppy/plugins/hook_creator/register_callbacks.py +33 -0
  209. code_puppy/plugins/hook_manager/__init__.py +1 -0
  210. code_puppy/plugins/hook_manager/config.py +290 -0
  211. code_puppy/plugins/hook_manager/hooks_menu.py +564 -0
  212. code_puppy/plugins/hook_manager/register_callbacks.py +227 -0
  213. code_puppy/plugins/oauth_puppy_html.py +228 -0
  214. code_puppy/plugins/scheduler/__init__.py +1 -0
  215. code_puppy/plugins/scheduler/register_callbacks.py +88 -0
  216. code_puppy/plugins/scheduler/scheduler_menu.py +522 -0
  217. code_puppy/plugins/scheduler/scheduler_wizard.py +341 -0
  218. code_puppy/plugins/shell_safety/__init__.py +6 -0
  219. code_puppy/plugins/shell_safety/agent_shell_safety.py +69 -0
  220. code_puppy/plugins/shell_safety/command_cache.py +156 -0
  221. code_puppy/plugins/shell_safety/register_callbacks.py +202 -0
  222. code_puppy/plugins/synthetic_status/__init__.py +1 -0
  223. code_puppy/plugins/synthetic_status/register_callbacks.py +132 -0
  224. code_puppy/plugins/synthetic_status/status_api.py +147 -0
  225. code_puppy/plugins/universal_constructor/__init__.py +13 -0
  226. code_puppy/plugins/universal_constructor/models.py +138 -0
  227. code_puppy/plugins/universal_constructor/register_callbacks.py +47 -0
  228. code_puppy/plugins/universal_constructor/registry.py +302 -0
  229. code_puppy/plugins/universal_constructor/sandbox.py +584 -0
  230. code_puppy/prompts/antigravity_system_prompt.md +1 -0
  231. code_puppy/pydantic_patches.py +356 -0
  232. code_puppy/reopenable_async_client.py +232 -0
  233. code_puppy/round_robin_model.py +150 -0
  234. code_puppy/scheduler/__init__.py +41 -0
  235. code_puppy/scheduler/__main__.py +9 -0
  236. code_puppy/scheduler/cli.py +118 -0
  237. code_puppy/scheduler/config.py +126 -0
  238. code_puppy/scheduler/daemon.py +280 -0
  239. code_puppy/scheduler/executor.py +155 -0
  240. code_puppy/scheduler/platform.py +19 -0
  241. code_puppy/scheduler/platform_unix.py +22 -0
  242. code_puppy/scheduler/platform_win.py +32 -0
  243. code_puppy/session_storage.py +338 -0
  244. code_puppy/status_display.py +257 -0
  245. code_puppy/summarization_agent.py +176 -0
  246. code_puppy/terminal_utils.py +418 -0
  247. code_puppy/tools/__init__.py +501 -0
  248. code_puppy/tools/agent_tools.py +603 -0
  249. code_puppy/tools/ask_user_question/__init__.py +26 -0
  250. code_puppy/tools/ask_user_question/constants.py +73 -0
  251. code_puppy/tools/ask_user_question/demo_tui.py +55 -0
  252. code_puppy/tools/ask_user_question/handler.py +232 -0
  253. code_puppy/tools/ask_user_question/models.py +304 -0
  254. code_puppy/tools/ask_user_question/registration.py +26 -0
  255. code_puppy/tools/ask_user_question/renderers.py +309 -0
  256. code_puppy/tools/ask_user_question/terminal_ui.py +329 -0
  257. code_puppy/tools/ask_user_question/theme.py +155 -0
  258. code_puppy/tools/ask_user_question/tui_loop.py +423 -0
  259. code_puppy/tools/browser/__init__.py +37 -0
  260. code_puppy/tools/browser/browser_control.py +289 -0
  261. code_puppy/tools/browser/browser_interactions.py +545 -0
  262. code_puppy/tools/browser/browser_locators.py +640 -0
  263. code_puppy/tools/browser/browser_manager.py +378 -0
  264. code_puppy/tools/browser/browser_navigation.py +251 -0
  265. code_puppy/tools/browser/browser_screenshot.py +179 -0
  266. code_puppy/tools/browser/browser_scripts.py +462 -0
  267. code_puppy/tools/browser/browser_workflows.py +221 -0
  268. code_puppy/tools/browser/chromium_terminal_manager.py +259 -0
  269. code_puppy/tools/browser/terminal_command_tools.py +534 -0
  270. code_puppy/tools/browser/terminal_screenshot_tools.py +552 -0
  271. code_puppy/tools/browser/terminal_tools.py +525 -0
  272. code_puppy/tools/command_runner.py +1346 -0
  273. code_puppy/tools/common.py +1409 -0
  274. code_puppy/tools/display.py +84 -0
  275. code_puppy/tools/file_modifications.py +886 -0
  276. code_puppy/tools/file_operations.py +802 -0
  277. code_puppy/tools/scheduler_tools.py +412 -0
  278. code_puppy/tools/skills_tools.py +244 -0
  279. code_puppy/tools/subagent_context.py +158 -0
  280. code_puppy/tools/tools_content.py +51 -0
  281. code_puppy/tools/universal_constructor.py +889 -0
  282. code_puppy/uvx_detection.py +242 -0
  283. code_puppy/version_checker.py +82 -0
  284. codepp-0.0.437.dist-info/METADATA +766 -0
  285. codepp-0.0.437.dist-info/RECORD +288 -0
  286. codepp-0.0.437.dist-info/WHEEL +4 -0
  287. codepp-0.0.437.dist-info/entry_points.txt +3 -0
  288. codepp-0.0.437.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,589 @@
1
+ """
2
+ Health monitoring system for MCP servers.
3
+
4
+ This module provides continuous health monitoring for MCP servers with
5
+ automatic recovery actions when consecutive failures are detected.
6
+ """
7
+
8
+ import asyncio
9
+ import logging
10
+ import time
11
+ from collections import defaultdict, deque
12
+ from dataclasses import dataclass
13
+ from datetime import datetime
14
+ from typing import Callable, Dict, List, Optional
15
+
16
+ import httpx
17
+
18
+ from .managed_server import ManagedMCPServer
19
+
20
+ logger = logging.getLogger(__name__)
21
+
22
+
23
+ @dataclass
24
+ class HealthStatus:
25
+ """Status of a health check for an MCP server."""
26
+
27
+ timestamp: datetime
28
+ is_healthy: bool
29
+ latency_ms: Optional[float]
30
+ error: Optional[str]
31
+ check_type: str # "ping", "list_tools", "get_request", etc.
32
+
33
+
34
+ @dataclass
35
+ class HealthCheckResult:
36
+ """Result of performing a health check."""
37
+
38
+ success: bool
39
+ latency_ms: float
40
+ error: Optional[str]
41
+
42
+
43
+ class HealthMonitor:
44
+ """
45
+ Continuous health monitoring system for MCP servers.
46
+
47
+ Features:
48
+ - Background monitoring tasks using asyncio
49
+ - Server type-specific health checks
50
+ - Health history tracking with configurable limit
51
+ - Custom health check registration
52
+ - Automatic recovery triggering on consecutive failures
53
+ - Configurable check intervals
54
+
55
+ Example usage:
56
+ monitor = HealthMonitor(check_interval=30)
57
+ await monitor.start_monitoring("server-1", managed_server)
58
+
59
+ # Check current health
60
+ is_healthy = monitor.is_healthy("server-1")
61
+
62
+ # Get health history
63
+ history = monitor.get_health_history("server-1", limit=50)
64
+ """
65
+
66
+ def __init__(self, check_interval: int = 30):
67
+ """
68
+ Initialize the health monitor.
69
+
70
+ Args:
71
+ check_interval: Interval between health checks in seconds
72
+ """
73
+ self.check_interval = check_interval
74
+ self.monitoring_tasks: Dict[str, asyncio.Task] = {}
75
+ self.health_history: Dict[str, deque] = defaultdict(lambda: deque(maxlen=1000))
76
+ self.custom_health_checks: Dict[str, Callable] = {}
77
+ self.consecutive_failures: Dict[str, int] = defaultdict(int)
78
+ self.last_check_time: Dict[str, datetime] = {}
79
+ self._lock = asyncio.Lock()
80
+
81
+ # Register default health checks for each server type
82
+ self._register_default_health_checks()
83
+
84
+ logger.info(f"Health monitor initialized with {check_interval}s check interval")
85
+
86
+ def _register_default_health_checks(self) -> None:
87
+ """Register default health check methods for each server type."""
88
+ self.register_health_check("sse", self._check_sse_health)
89
+ self.register_health_check("http", self._check_http_health)
90
+ self.register_health_check("stdio", self._check_stdio_health)
91
+
92
+ async def start_monitoring(self, server_id: str, server: ManagedMCPServer) -> None:
93
+ """
94
+ Start continuous health monitoring for a server.
95
+
96
+ Args:
97
+ server_id: Unique identifier for the server
98
+ server: The managed MCP server instance to monitor
99
+ """
100
+ if server_id in self.monitoring_tasks:
101
+ logger.warning(f"Server {server_id} is already being monitored")
102
+ return
103
+
104
+ logger.info(f"Starting health monitoring for server {server_id}")
105
+
106
+ # Create background monitoring task
107
+ task = asyncio.create_task(
108
+ self._monitoring_loop(server_id, server), name=f"health_monitor_{server_id}"
109
+ )
110
+ self.monitoring_tasks[server_id] = task
111
+
112
+ # Perform initial health check
113
+ try:
114
+ health_status = await self.check_health(server)
115
+ await self._record_health_status(server_id, health_status)
116
+ except Exception as e:
117
+ logger.error(f"Initial health check failed for {server_id}: {e}")
118
+ error_status = HealthStatus(
119
+ timestamp=datetime.now(),
120
+ is_healthy=False,
121
+ latency_ms=None,
122
+ error=str(e),
123
+ check_type="initial",
124
+ )
125
+ await self._record_health_status(server_id, error_status)
126
+
127
+ async def stop_monitoring(self, server_id: str) -> None:
128
+ """
129
+ Stop health monitoring for a server.
130
+
131
+ Args:
132
+ server_id: Unique identifier for the server
133
+ """
134
+ task = self.monitoring_tasks.pop(server_id, None)
135
+ if task:
136
+ logger.info(f"Stopping health monitoring for server {server_id}")
137
+ task.cancel()
138
+ try:
139
+ await task
140
+ except asyncio.CancelledError:
141
+ pass
142
+
143
+ # Clean up tracking data
144
+ async with self._lock:
145
+ self.consecutive_failures.pop(server_id, None)
146
+ self.last_check_time.pop(server_id, None)
147
+ else:
148
+ logger.warning(f"No monitoring task found for server {server_id}")
149
+
150
+ async def check_health(self, server: ManagedMCPServer) -> HealthStatus:
151
+ """
152
+ Perform a health check for a server.
153
+
154
+ Args:
155
+ server: The managed MCP server to check
156
+
157
+ Returns:
158
+ HealthStatus object with check results
159
+ """
160
+ server_type = server.config.type.lower()
161
+ check_func = self.custom_health_checks.get(server_type)
162
+
163
+ if not check_func:
164
+ logger.warning(
165
+ f"No health check function registered for server type: {server_type}"
166
+ )
167
+ return HealthStatus(
168
+ timestamp=datetime.now(),
169
+ is_healthy=False,
170
+ latency_ms=None,
171
+ error=f"No health check registered for type '{server_type}'",
172
+ check_type="unknown",
173
+ )
174
+
175
+ try:
176
+ result = await self.perform_health_check(server)
177
+ return HealthStatus(
178
+ timestamp=datetime.now(),
179
+ is_healthy=result.success,
180
+ latency_ms=result.latency_ms,
181
+ error=result.error,
182
+ check_type=server_type,
183
+ )
184
+ except Exception as e:
185
+ logger.error(f"Health check failed for server {server.config.id}: {e}")
186
+ return HealthStatus(
187
+ timestamp=datetime.now(),
188
+ is_healthy=False,
189
+ latency_ms=None,
190
+ error=str(e),
191
+ check_type=server_type,
192
+ )
193
+
194
+ async def perform_health_check(self, server: ManagedMCPServer) -> HealthCheckResult:
195
+ """
196
+ Perform the actual health check based on server type.
197
+
198
+ Args:
199
+ server: The managed MCP server to check
200
+
201
+ Returns:
202
+ HealthCheckResult with timing and success information
203
+ """
204
+ server_type = server.config.type.lower()
205
+ check_func = self.custom_health_checks.get(server_type)
206
+
207
+ if not check_func:
208
+ return HealthCheckResult(
209
+ success=False,
210
+ latency_ms=0.0,
211
+ error=f"No health check function for type '{server_type}'",
212
+ )
213
+
214
+ start_time = time.time()
215
+ try:
216
+ result = await check_func(server)
217
+ latency_ms = (time.time() - start_time) * 1000
218
+
219
+ if isinstance(result, bool):
220
+ return HealthCheckResult(
221
+ success=result,
222
+ latency_ms=latency_ms,
223
+ error=None if result else "Health check returned False",
224
+ )
225
+ elif isinstance(result, HealthCheckResult):
226
+ # Update latency if not already set
227
+ if result.latency_ms == 0.0:
228
+ result.latency_ms = latency_ms
229
+ return result
230
+ else:
231
+ return HealthCheckResult(
232
+ success=False,
233
+ latency_ms=latency_ms,
234
+ error=f"Invalid health check result type: {type(result)}",
235
+ )
236
+
237
+ except Exception as e:
238
+ latency_ms = (time.time() - start_time) * 1000
239
+ return HealthCheckResult(success=False, latency_ms=latency_ms, error=str(e))
240
+
241
+ def register_health_check(self, server_type: str, check_func: Callable) -> None:
242
+ """
243
+ Register a custom health check function for a server type.
244
+
245
+ Args:
246
+ server_type: The server type ("sse", "http", "stdio")
247
+ check_func: Async function that takes a ManagedMCPServer and returns
248
+ bool or HealthCheckResult
249
+ """
250
+ self.custom_health_checks[server_type.lower()] = check_func
251
+ logger.info(f"Registered health check for server type: {server_type}")
252
+
253
+ async def get_health_history(
254
+ self, server_id: str, limit: int = 100
255
+ ) -> List[HealthStatus]:
256
+ """
257
+ Get health check history for a server.
258
+
259
+ Args:
260
+ server_id: Unique identifier for the server
261
+ limit: Maximum number of history entries to return
262
+
263
+ Returns:
264
+ List of HealthStatus objects, most recent first
265
+ """
266
+ async with self._lock:
267
+ history = self.health_history.get(server_id, deque())
268
+ # Convert deque to list and limit results
269
+ result = list(history)[-limit:] if limit > 0 else list(history)
270
+ # Reverse to get most recent first
271
+ result.reverse()
272
+ return result
273
+
274
+ async def is_healthy(self, server_id: str) -> bool:
275
+ """
276
+ Check if a server is currently healthy based on latest status.
277
+
278
+ Args:
279
+ server_id: Unique identifier for the server
280
+
281
+ Returns:
282
+ True if server is healthy, False otherwise
283
+ """
284
+ async with self._lock:
285
+ history = self.health_history.get(server_id)
286
+ if not history:
287
+ return False
288
+
289
+ # Get most recent health status
290
+ latest_status = history[-1]
291
+ return latest_status.is_healthy
292
+
293
+ async def _monitoring_loop(self, server_id: str, server: ManagedMCPServer) -> None:
294
+ """
295
+ Main monitoring loop that runs in the background.
296
+
297
+ Args:
298
+ server_id: Unique identifier for the server
299
+ server: The managed MCP server to monitor
300
+ """
301
+ logger.info(f"Starting monitoring loop for server {server_id}")
302
+
303
+ while True:
304
+ try:
305
+ # Wait for check interval
306
+ await asyncio.sleep(self.check_interval)
307
+
308
+ # Skip if server is not enabled
309
+ if not server.is_enabled():
310
+ continue
311
+
312
+ # Perform health check
313
+ health_status = await self.check_health(server)
314
+ await self._record_health_status(server_id, health_status)
315
+
316
+ # Handle consecutive failures
317
+ async with self._lock:
318
+ if not health_status.is_healthy:
319
+ self.consecutive_failures[server_id] += 1
320
+ logger.warning(
321
+ f"Health check failed for {server_id}: {health_status.error} "
322
+ f"(consecutive failures: {self.consecutive_failures[server_id]})"
323
+ )
324
+ else:
325
+ # Reset consecutive failure count on success
326
+ if self.consecutive_failures[server_id] > 0:
327
+ logger.info(
328
+ f"Server {server_id} recovered after health check success"
329
+ )
330
+ self.consecutive_failures[server_id] = 0
331
+
332
+ # Trigger recovery on consecutive failures (outside lock)
333
+ if not health_status.is_healthy:
334
+ await self._handle_consecutive_failures(server_id, server)
335
+
336
+ self.last_check_time[server_id] = datetime.now()
337
+
338
+ except asyncio.CancelledError:
339
+ logger.info(f"Monitoring loop cancelled for server {server_id}")
340
+ break
341
+ except Exception as e:
342
+ logger.error(f"Error in monitoring loop for {server_id}: {e}")
343
+ # Continue monitoring despite errors
344
+ await asyncio.sleep(5) # Brief delay before retrying
345
+
346
+ async def _record_health_status(self, server_id: str, status: HealthStatus) -> None:
347
+ """
348
+ Record a health status in the history.
349
+
350
+ Args:
351
+ server_id: Unique identifier for the server
352
+ status: The health status to record
353
+ """
354
+ async with self._lock:
355
+ self.health_history[server_id].append(status)
356
+
357
+ # Log health status changes
358
+ if status.is_healthy:
359
+ logger.debug(
360
+ f"Server {server_id} health check passed ({status.latency_ms:.1f}ms)"
361
+ )
362
+ else:
363
+ logger.warning(f"Server {server_id} health check failed: {status.error}")
364
+
365
+ async def _handle_consecutive_failures(
366
+ self, server_id: str, server: ManagedMCPServer
367
+ ) -> None:
368
+ """
369
+ Handle consecutive health check failures.
370
+
371
+ Args:
372
+ server_id: Unique identifier for the server
373
+ server: The managed MCP server
374
+ """
375
+ async with self._lock:
376
+ failure_count = self.consecutive_failures[server_id]
377
+
378
+ # Trigger recovery actions based on failure count
379
+ if failure_count >= 3:
380
+ logger.error(
381
+ f"Server {server_id} has {failure_count} consecutive failures, triggering recovery"
382
+ )
383
+
384
+ try:
385
+ # Attempt to recover the server
386
+ await self._trigger_recovery(server_id, server, failure_count)
387
+ except Exception as e:
388
+ logger.error(f"Recovery failed for server {server_id}: {e}")
389
+
390
+ # Quarantine server after many consecutive failures
391
+ if failure_count >= 5:
392
+ logger.critical(
393
+ f"Quarantining server {server_id} after {failure_count} consecutive failures"
394
+ )
395
+ try:
396
+ # Calculate quarantine duration with exponential backoff
397
+ quarantine_duration = min(
398
+ 30 * (2 ** (failure_count - 5)), 1800
399
+ ) # Max 30 minutes
400
+ server.quarantine(quarantine_duration)
401
+ except Exception as e:
402
+ logger.error(f"Failed to quarantine server {server_id}: {e}")
403
+
404
+ async def _trigger_recovery(
405
+ self, server_id: str, server: ManagedMCPServer, failure_count: int
406
+ ) -> None:
407
+ """
408
+ Trigger recovery actions for a failing server.
409
+
410
+ Args:
411
+ server_id: Unique identifier for the server
412
+ server: The managed MCP server
413
+ failure_count: Number of consecutive failures
414
+ """
415
+ logger.info(
416
+ f"Triggering recovery for server {server_id} (failure count: {failure_count})"
417
+ )
418
+
419
+ try:
420
+ # For now, just disable and re-enable the server
421
+ # In the future, this could include more sophisticated recovery actions
422
+ server.disable()
423
+ await asyncio.sleep(1) # Brief delay
424
+ server.enable()
425
+
426
+ logger.info(f"Recovery attempt completed for server {server_id}")
427
+
428
+ except Exception as e:
429
+ logger.error(f"Recovery action failed for server {server_id}: {e}")
430
+ raise
431
+
432
+ async def _check_sse_health(self, server: ManagedMCPServer) -> HealthCheckResult:
433
+ """
434
+ Health check for SSE servers using GET request.
435
+
436
+ Args:
437
+ server: The managed MCP server to check
438
+
439
+ Returns:
440
+ HealthCheckResult with check results
441
+ """
442
+ try:
443
+ config = server.config.config
444
+ url = config.get("url")
445
+ if not url:
446
+ return HealthCheckResult(
447
+ success=False,
448
+ latency_ms=0.0,
449
+ error="No URL configured for SSE server",
450
+ )
451
+
452
+ # Add health endpoint if available, otherwise use base URL
453
+ health_url = (
454
+ f"{url.rstrip('/')}/health" if not url.endswith("/health") else url
455
+ )
456
+
457
+ async with httpx.AsyncClient(timeout=10.0) as client:
458
+ response = await client.get(health_url)
459
+
460
+ if response.status_code == 404:
461
+ # Try base URL if health endpoint doesn't exist
462
+ response = await client.get(url)
463
+
464
+ success = 200 <= response.status_code < 400
465
+ error = (
466
+ None
467
+ if success
468
+ else f"HTTP {response.status_code}: {response.reason_phrase}"
469
+ )
470
+
471
+ return HealthCheckResult(
472
+ success=success,
473
+ latency_ms=0.0, # Will be filled by perform_health_check
474
+ error=error,
475
+ )
476
+
477
+ except Exception as e:
478
+ return HealthCheckResult(success=False, latency_ms=0.0, error=str(e))
479
+
480
+ async def _check_http_health(self, server: ManagedMCPServer) -> HealthCheckResult:
481
+ """
482
+ Health check for HTTP servers using GET request.
483
+
484
+ Args:
485
+ server: The managed MCP server to check
486
+
487
+ Returns:
488
+ HealthCheckResult with check results
489
+ """
490
+ # HTTP servers use the same check as SSE servers
491
+ return await self._check_sse_health(server)
492
+
493
+ async def _check_stdio_health(self, server: ManagedMCPServer) -> HealthCheckResult:
494
+ """
495
+ Health check for stdio servers using ping command.
496
+
497
+ Args:
498
+ server: The managed MCP server to check
499
+
500
+ Returns:
501
+ HealthCheckResult with check results
502
+ """
503
+ try:
504
+ # Get the pydantic server instance
505
+ server.get_pydantic_server()
506
+
507
+ # Try to get available tools as a health check
508
+ # This requires the server to be responsive
509
+ try:
510
+ # Attempt to list tools - this is a good health check for MCP servers
511
+ # Note: This is a simplified check. In a real implementation,
512
+ # we'd need to send an actual MCP message
513
+
514
+ # For now, we'll check if we can create the server instance
515
+ # and if it appears to be configured correctly
516
+ config = server.config.config
517
+ command = config.get("command")
518
+
519
+ if not command:
520
+ return HealthCheckResult(
521
+ success=False,
522
+ latency_ms=0.0,
523
+ error="No command configured for stdio server",
524
+ )
525
+
526
+ # Basic validation that command exists
527
+ import shutil
528
+
529
+ if not shutil.which(command):
530
+ return HealthCheckResult(
531
+ success=False,
532
+ latency_ms=0.0,
533
+ error=f"Command '{command}' not found in PATH",
534
+ )
535
+
536
+ # If we get here, basic checks passed
537
+ return HealthCheckResult(success=True, latency_ms=0.0, error=None)
538
+
539
+ except Exception as e:
540
+ return HealthCheckResult(
541
+ success=False,
542
+ latency_ms=0.0,
543
+ error=f"Server communication failed: {str(e)}",
544
+ )
545
+
546
+ except Exception as e:
547
+ return HealthCheckResult(success=False, latency_ms=0.0, error=str(e))
548
+
549
+ async def close(self) -> None:
550
+ """Close the health monitor, stopping all monitoring tasks."""
551
+ await self.shutdown()
552
+
553
+ async def __aenter__(self) -> "HealthMonitor":
554
+ """Enter async context manager."""
555
+ return self
556
+
557
+ async def __aexit__(self, exc_type, exc_val, exc_tb) -> None:
558
+ """Exit async context manager, ensuring all tasks are cleaned up."""
559
+ await self.close()
560
+
561
+ def __del__(self) -> None:
562
+ """Warn if there are still running monitoring tasks on garbage collection."""
563
+ if self.monitoring_tasks:
564
+ logger.warning(
565
+ f"HealthMonitor garbage collected with {len(self.monitoring_tasks)} "
566
+ f"active monitoring tasks. Use 'async with' or call close() to "
567
+ f"prevent orphaned tasks."
568
+ )
569
+
570
+ async def shutdown(self) -> None:
571
+ """
572
+ Shutdown all monitoring tasks gracefully.
573
+ """
574
+ logger.info("Shutting down health monitor")
575
+
576
+ # Cancel all monitoring tasks
577
+ tasks = list(self.monitoring_tasks.values())
578
+ for task in tasks:
579
+ task.cancel()
580
+
581
+ # Wait for all tasks to complete
582
+ if tasks:
583
+ await asyncio.gather(*tasks, return_exceptions=True)
584
+
585
+ self.monitoring_tasks.clear()
586
+ self.consecutive_failures.clear()
587
+ self.last_check_time.clear()
588
+
589
+ logger.info("Health monitor shutdown complete")