@agentunion/kite 1.4.0 → 1.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (235) hide show
  1. package/CHANGELOG.md +102 -0
  2. package/cli.js +44 -5
  3. package/core/dependency_checker.py +250 -0
  4. package/core/env_checker.py +490 -0
  5. package/dependencies_lock.json +128 -0
  6. package/extensions/agents/assistant/server.py +33 -17
  7. package/extensions/channels/acp_channel/server.py +33 -17
  8. package/extensions/services/backup/entry.py +23 -16
  9. package/extensions/services/evol/auth_manager.py +443 -0
  10. package/extensions/services/evol/config.yaml +149 -0
  11. package/extensions/services/evol/config_loader.py +117 -0
  12. package/extensions/services/evol/entry.py +406 -0
  13. package/extensions/services/evol/evol_api.py +173 -0
  14. package/extensions/services/evol/evol_config.json5 +29 -0
  15. package/extensions/services/evol/migrate_tokens.py +122 -0
  16. package/extensions/services/evol/module.md +32 -0
  17. package/extensions/services/evol/pairing.py +250 -0
  18. package/extensions/services/evol/pairing_codes.jsonl +1 -0
  19. package/extensions/services/evol/relay.py +682 -0
  20. package/extensions/services/evol/relay_config.json5 +67 -0
  21. package/extensions/services/evol/routes/__init__.py +1 -0
  22. package/extensions/services/evol/routes/routes_management_ws.py +127 -0
  23. package/extensions/services/evol/routes/routes_rpc.py +89 -0
  24. package/extensions/services/evol/routes/routes_test.py +61 -0
  25. package/extensions/services/evol/server.py +875 -0
  26. package/extensions/services/evol/static/css/style.css +1200 -0
  27. package/extensions/services/evol/static/index.html +781 -0
  28. package/extensions/services/evol/static/index_evol.html +14 -0
  29. package/extensions/services/evol/static/js/app.js +6304 -0
  30. package/extensions/services/evol/static/js/auth.js +326 -0
  31. package/extensions/services/evol/static/js/dialog.js +285 -0
  32. package/extensions/services/evol/static/js/evol-app-fixed.js +50 -0
  33. package/extensions/services/evol/static/js/evol-app.js +1949 -0
  34. package/extensions/services/evol/static/js/evol-app.js.bak +1800 -0
  35. package/extensions/services/evol/static/js/kernel-client-example.js +228 -0
  36. package/extensions/services/evol/static/js/kernel-client.js +396 -0
  37. package/extensions/services/evol/static/js/main.js +141 -0
  38. package/extensions/services/evol/static/js/registry-tests.js +585 -0
  39. package/extensions/services/evol/static/js/stats.js +217 -0
  40. package/extensions/services/evol/static/js/token-manager.js +175 -0
  41. package/extensions/services/evol/static/pairing.html +248 -0
  42. package/extensions/services/evol/static/test_registry.html +262 -0
  43. package/extensions/services/evol/static/test_relay.html +462 -0
  44. package/extensions/services/evol/stats_manager.py +240 -0
  45. package/extensions/services/model_service/entry.py +23 -1
  46. package/extensions/services/proxy/.claude/settings.local.json +13 -0
  47. package/extensions/services/proxy/CHANGELOG_20260308.md +258 -0
  48. package/extensions/services/proxy/_fix_prints.py +133 -0
  49. package/extensions/services/proxy/_fix_prints2.py +87 -0
  50. package/extensions/services/proxy/agentcp/LICENCE +178 -0
  51. package/extensions/services/proxy/agentcp/README copy.md +85 -0
  52. package/extensions/services/proxy/agentcp/README.md +260 -0
  53. package/extensions/services/proxy/agentcp/__init__.py +16 -0
  54. package/extensions/services/proxy/agentcp/agent.py +4 -0
  55. package/extensions/services/proxy/agentcp/agentcp.py +2494 -0
  56. package/extensions/services/proxy/agentcp/agentprofile.json +89 -0
  57. package/extensions/services/proxy/agentcp/ap/__init__.py +16 -0
  58. package/extensions/services/proxy/agentcp/ap/ap_client.py +316 -0
  59. package/extensions/services/proxy/agentcp/assets/images/wechat_qr.png +0 -0
  60. package/extensions/services/proxy/agentcp/backup/metrics.json +31 -0
  61. package/extensions/services/proxy/agentcp/base/__init__.py +20 -0
  62. package/extensions/services/proxy/agentcp/base/auth_client.py +257 -0
  63. package/extensions/services/proxy/agentcp/base/client.py +112 -0
  64. package/extensions/services/proxy/agentcp/base/env.py +34 -0
  65. package/extensions/services/proxy/agentcp/base/html_util.py +336 -0
  66. package/extensions/services/proxy/agentcp/base/log.py +98 -0
  67. package/extensions/services/proxy/agentcp/ca/__init__.py +17 -0
  68. package/extensions/services/proxy/agentcp/ca/ca_client.py +414 -0
  69. package/extensions/services/proxy/agentcp/ca/ca_root.py +74 -0
  70. package/extensions/services/proxy/agentcp/context/__init__.py +20 -0
  71. package/extensions/services/proxy/agentcp/context/context.py +73 -0
  72. package/extensions/services/proxy/agentcp/context/exceptions.py +114 -0
  73. package/extensions/services/proxy/agentcp/create_profile.py +125 -0
  74. package/extensions/services/proxy/agentcp/create_profile_weather.py +125 -0
  75. package/extensions/services/proxy/agentcp/db/__init__.py +15 -0
  76. package/extensions/services/proxy/agentcp/db/db_mananger.py +550 -0
  77. package/extensions/services/proxy/agentcp/docs/UDP_HEARTBEAT_FIX_REPORT.md +265 -0
  78. package/extensions/services/proxy/agentcp/docs/heartbeat_issue_analysis.md +291 -0
  79. package/extensions/services/proxy/agentcp/file/__init__.py +16 -0
  80. package/extensions/services/proxy/agentcp/file/file_client.py +141 -0
  81. package/extensions/services/proxy/agentcp/file/wss_binary_message.py +137 -0
  82. package/extensions/services/proxy/agentcp/hcp.py +299 -0
  83. package/extensions/services/proxy/agentcp/heartbeat/__init__.py +16 -0
  84. package/extensions/services/proxy/agentcp/heartbeat/heartbeat_client.py +360 -0
  85. package/extensions/services/proxy/agentcp/improved_scheduler.py +498 -0
  86. package/extensions/services/proxy/agentcp/llm_agent_utils.py +249 -0
  87. package/extensions/services/proxy/agentcp/llm_server.py +172 -0
  88. package/extensions/services/proxy/agentcp/mermaid.py +210 -0
  89. package/extensions/services/proxy/agentcp/message.py +149 -0
  90. package/extensions/services/proxy/agentcp/metrics.py +256 -0
  91. package/extensions/services/proxy/agentcp/monitoring/__init__.py +20 -0
  92. package/extensions/services/proxy/agentcp/monitoring/global_monitor.py +27 -0
  93. package/extensions/services/proxy/agentcp/monitoring/metrics_store.py +325 -0
  94. package/extensions/services/proxy/agentcp/monitoring/monitoring_service.py +269 -0
  95. package/extensions/services/proxy/agentcp/monitoring/sliding_window.py +222 -0
  96. package/extensions/services/proxy/agentcp/monitoring/standalone_reader.py +224 -0
  97. package/extensions/services/proxy/agentcp/msg/__init__.py +21 -0
  98. package/extensions/services/proxy/agentcp/msg/connection_manager.py +456 -0
  99. package/extensions/services/proxy/agentcp/msg/message_client.py +2058 -0
  100. package/extensions/services/proxy/agentcp/msg/message_serialize.py +263 -0
  101. package/extensions/services/proxy/agentcp/msg/open_ai_message.py +88 -0
  102. package/extensions/services/proxy/agentcp/msg/session_manager.py +1062 -0
  103. package/extensions/services/proxy/agentcp/msg/stream_client.py +267 -0
  104. package/extensions/services/proxy/agentcp/msg/websocket_file_receiver.py +89 -0
  105. package/extensions/services/proxy/agentcp/msg/ws_logger.py +685 -0
  106. package/extensions/services/proxy/agentcp/msg/wss_binary_message.py +137 -0
  107. package/extensions/services/proxy/agentcp/requirements.txt +7 -0
  108. package/extensions/services/proxy/agentcp/samples/agent_graph/README.md +37 -0
  109. package/extensions/services/proxy/agentcp/samples/agent_graph/agentprofile.json +89 -0
  110. package/extensions/services/proxy/agentcp/samples/agent_graph/create_profile.py +138 -0
  111. package/extensions/services/proxy/agentcp/samples/agent_graph/main.py +164 -0
  112. package/extensions/services/proxy/agentcp/samples/agent_use/create_profile.py +123 -0
  113. package/extensions/services/proxy/agentcp/samples/agent_use/llm/create_profile.py +129 -0
  114. package/extensions/services/proxy/agentcp/samples/agent_use/llm/env.json +5 -0
  115. package/extensions/services/proxy/agentcp/samples/agent_use/llm/main.py +146 -0
  116. package/extensions/services/proxy/agentcp/samples/agent_use/main.py +123 -0
  117. package/extensions/services/proxy/agentcp/samples/agent_use/readme.md +379 -0
  118. package/extensions/services/proxy/agentcp/samples/agent_use/search/create_profile.py +129 -0
  119. package/extensions/services/proxy/agentcp/samples/agent_use/search/main.py +28 -0
  120. package/extensions/services/proxy/agentcp/samples/agent_use/tool/create_profile.py +129 -0
  121. package/extensions/services/proxy/agentcp/samples/agent_use/tool/main.py +20 -0
  122. package/extensions/services/proxy/agentcp/samples/ali_amap/README.md +97 -0
  123. package/extensions/services/proxy/agentcp/samples/ali_amap/amap_agent.py +88 -0
  124. package/extensions/services/proxy/agentcp/samples/ali_amap/create_profile.py +125 -0
  125. package/extensions/services/proxy/agentcp/samples/compute_agent/agent/powershell.py +228 -0
  126. package/extensions/services/proxy/agentcp/samples/compute_agent/agent/software.py +63 -0
  127. package/extensions/services/proxy/agentcp/samples/compute_agent/agent/tools.py +36 -0
  128. package/extensions/services/proxy/agentcp/samples/compute_agent/browser_user.py +41 -0
  129. package/extensions/services/proxy/agentcp/samples/deepseek/README.md +79 -0
  130. package/extensions/services/proxy/agentcp/samples/deepseek/create_profile.py +126 -0
  131. package/extensions/services/proxy/agentcp/samples/deepseek/deepseek.py +42 -0
  132. package/extensions/services/proxy/agentcp/samples/dify_chat/README.md +78 -0
  133. package/extensions/services/proxy/agentcp/samples/dify_chat/create_profile.py +126 -0
  134. package/extensions/services/proxy/agentcp/samples/dify_chat/dify_chat.py +47 -0
  135. package/extensions/services/proxy/agentcp/samples/dify_workflow/README.md +78 -0
  136. package/extensions/services/proxy/agentcp/samples/dify_workflow/create_profile.py +126 -0
  137. package/extensions/services/proxy/agentcp/samples/dify_workflow/dify_workflow.py +46 -0
  138. package/extensions/services/proxy/agentcp/samples/executor/README.md +44 -0
  139. package/extensions/services/proxy/agentcp/samples/executor/agentprofile.json +89 -0
  140. package/extensions/services/proxy/agentcp/samples/executor/create_profile.py +139 -0
  141. package/extensions/services/proxy/agentcp/samples/executor/main.py +160 -0
  142. package/extensions/services/proxy/agentcp/samples/filereader/README.md +45 -0
  143. package/extensions/services/proxy/agentcp/samples/filereader/agentprofile.json +90 -0
  144. package/extensions/services/proxy/agentcp/samples/filereader/create_profile.py +137 -0
  145. package/extensions/services/proxy/agentcp/samples/filereader/main.py +253 -0
  146. package/extensions/services/proxy/agentcp/samples/filewriter/README.md +38 -0
  147. package/extensions/services/proxy/agentcp/samples/filewriter/agentprofile.json +91 -0
  148. package/extensions/services/proxy/agentcp/samples/filewriter/create_profile.py +138 -0
  149. package/extensions/services/proxy/agentcp/samples/filewriter/main.py +289 -0
  150. package/extensions/services/proxy/agentcp/samples/hcp/README.md +85 -0
  151. package/extensions/services/proxy/agentcp/samples/hcp/acp_weather_agent.zip +0 -0
  152. package/extensions/services/proxy/agentcp/samples/hcp/create_profile.py +125 -0
  153. package/extensions/services/proxy/agentcp/samples/hcp/hcp.py +237 -0
  154. package/extensions/services/proxy/agentcp/samples/helloworld/README.md +68 -0
  155. package/extensions/services/proxy/agentcp/samples/helloworld/hello_world.py +40 -0
  156. package/extensions/services/proxy/agentcp/samples/llm_agent/MEADME.md +117 -0
  157. package/extensions/services/proxy/agentcp/samples/llm_agent/create_profile.py +125 -0
  158. package/extensions/services/proxy/agentcp/samples/llm_agent/qwen_agent.py +136 -0
  159. package/extensions/services/proxy/agentcp/samples/local_llm_agent/README.md +90 -0
  160. package/extensions/services/proxy/agentcp/samples/local_llm_agent/create_profile.py +125 -0
  161. package/extensions/services/proxy/agentcp/samples/local_llm_agent/main.py +49 -0
  162. package/extensions/services/proxy/agentcp/samples/query_llm_from_agent/README.md +55 -0
  163. package/extensions/services/proxy/agentcp/samples/query_llm_from_agent/create_profile.py +125 -0
  164. package/extensions/services/proxy/agentcp/samples/query_llm_from_agent/main.py +23 -0
  165. package/extensions/services/proxy/agentcp/samples/query_weather_api_agent/README.md +103 -0
  166. package/extensions/services/proxy/agentcp/samples/query_weather_api_agent/create_profile.py +125 -0
  167. package/extensions/services/proxy/agentcp/samples/query_weather_api_agent/main.py +69 -0
  168. package/extensions/services/proxy/agentcp/samples/query_weather_from_agent/README.md +58 -0
  169. package/extensions/services/proxy/agentcp/samples/query_weather_from_agent/create_profile.py +125 -0
  170. package/extensions/services/proxy/agentcp/samples/query_weather_from_agent/main.py +25 -0
  171. package/extensions/services/proxy/agentcp/samples/qwen3/README.md +71 -0
  172. package/extensions/services/proxy/agentcp/samples/qwen3/create_profile.py +126 -0
  173. package/extensions/services/proxy/agentcp/samples/qwen3/qwen3.py +37 -0
  174. package/extensions/services/proxy/agentcp/samples/qwen3_tools/README.md +133 -0
  175. package/extensions/services/proxy/agentcp/samples/qwen3_tools/create_profile.py +126 -0
  176. package/extensions/services/proxy/agentcp/samples/qwen3_tools/qwen3_tools.py +98 -0
  177. package/extensions/services/proxy/agentcp/samples/search/create_profile_qwen.py +125 -0
  178. package/extensions/services/proxy/agentcp/samples/search/create_profile_search.py +125 -0
  179. package/extensions/services/proxy/agentcp/samples/search/qwen_agent.py +136 -0
  180. package/extensions/services/proxy/agentcp/samples/search/search_agent.py +170 -0
  181. package/extensions/services/proxy/agentcp/samples/wrapper_agently_to_agent/README.md +89 -0
  182. package/extensions/services/proxy/agentcp/samples/wrapper_agently_to_agent/create_profile.py +125 -0
  183. package/extensions/services/proxy/agentcp/samples/wrapper_agently_to_agent/main.py +44 -0
  184. package/extensions/services/proxy/agentcp/utils/__init__.py +15 -0
  185. package/extensions/services/proxy/agentcp/utils/file_util.py +117 -0
  186. package/extensions/services/proxy/agentcp/utils/proxy_bypass.py +99 -0
  187. package/extensions/services/proxy/agentcp/workflow.py +203 -0
  188. package/extensions/services/proxy/console_auth.py +109 -0
  189. package/extensions/services/proxy/evol/__init__.py +1 -0
  190. package/extensions/services/proxy/evol/config.py +37 -0
  191. package/extensions/services/proxy/evol/http/__init__.py +1 -0
  192. package/extensions/services/proxy/evol/http/async_http.py +551 -0
  193. package/extensions/services/proxy/evol/log.py +28 -0
  194. package/extensions/services/proxy/evol/presenter/__init__.py +2 -0
  195. package/extensions/services/proxy/evol/presenter/agentIdPresenter.py +1031 -0
  196. package/extensions/services/proxy/evol/presenter/apikeyPresenter.py +106 -0
  197. package/extensions/services/proxy/evol/presenter/configPresenter.py +1281 -0
  198. package/extensions/services/proxy/evol/presenter/userPresenter.py +477 -0
  199. package/extensions/services/proxy/evol/server/__init__.py +1 -0
  200. package/extensions/services/proxy/evol/server/claude_proxy_async.py +3430 -0
  201. package/extensions/services/proxy/evol/server/openclaw_proxy.py +1861 -0
  202. package/extensions/services/proxy/evol/server/proxy_config.py +15 -0
  203. package/extensions/services/proxy/evol/server/proxy_engine.py +501 -0
  204. package/extensions/services/proxy/evol/version.py +24 -0
  205. package/extensions/services/proxy/logs/websocket.log +260 -0
  206. package/extensions/services/proxy/main.py +240 -0
  207. package/extensions/services/proxy/requirements.txt +13 -0
  208. package/extensions/services/proxy/server.py +271 -0
  209. package/extensions/services/watchdog/entry.py +42 -16
  210. package/extensions/services/watchdog/module.md +1 -0
  211. package/extensions/services/watchdog/monitor.py +34 -4
  212. package/extensions/services/web/module.md +1 -1
  213. package/extensions/services/web/server.py +30 -18
  214. package/extensions/services/web/static/js/token-manager.js +10 -10
  215. package/kernel/entry.py +1 -1
  216. package/kernel/module.md +25 -1
  217. package/kernel/registry_store.py +2 -26
  218. package/kernel/rpc_router.py +36 -10
  219. package/kernel/server.py +106 -17
  220. package/kite_cli/commands/deps_install.py +67 -0
  221. package/kite_cli/commands/env_check.py +45 -0
  222. package/kite_cli/commands/prepare.py +49 -0
  223. package/kite_cli/commands/venv_setup.py +56 -0
  224. package/kite_cli/main.py +29 -1
  225. package/launcher/entry.py +306 -21
  226. package/launcher/module.md +9 -0
  227. package/launcher/module_scanner.py +11 -1
  228. package/main.py +4 -1
  229. package/package.json +8 -1
  230. package/python_version.json +4 -0
  231. package/requirements.txt +38 -0
  232. package/scripts/env-manager.js +328 -0
  233. package/scripts/python-env.js +79 -0
  234. package/scripts/scan_dependencies.py +461 -0
  235. package/scripts/setup-python-env.js +191 -0
@@ -0,0 +1,265 @@
1
+ # AgentCP UDP 心跳自动恢复修复报告
2
+
3
+ **修复日期**: 2026-01-30
4
+ **修复版本**: agentcp (python_backend)
5
+ **修复人员**: Claude Opus 4.5
6
+
7
+ ---
8
+
9
+ ## 一、问题描述
10
+
11
+ ### 1.1 现象
12
+ - 服务器端观察到客户端没有发送心跳
13
+ - 客户端重启后恢复正常
14
+ - 心跳发送线程在异常情况下停止,且没有自动恢复机制
15
+
16
+ ### 1.2 影响范围
17
+ - UDP 心跳客户端 (`heartbeat_client.py`)
18
+ - 认证客户端 (`auth_client.py`)
19
+
20
+ ---
21
+
22
+ ## 二、问题根因分析
23
+
24
+ ### 2.1 HeartbeatClient 问题
25
+
26
+ | 问题 | 严重程度 | 描述 |
27
+ |------|---------|------|
28
+ | Socket 异常后不重建 | 🔴 严重 | `sendto()` 异常后只打印错误,不尝试重建 socket,导致心跳永久失效 |
29
+ | 401 重登录后不更新 socket | 🔴 严重 | `sign_in()` 获取新服务器信息后,UDP socket 未重建,心跳发送到错误地址 |
30
+ | 接收线程阻塞无法中断 | 🔴 严重 | `recvfrom()` 阻塞调用,socket 损坏后陷入无限异常循环 |
31
+ | `offline()` 关闭顺序错误 | 🟡 中等 | 先关 socket 再置标志位,可能导致线程访问已关闭的 socket |
32
+ | 线程没有 join() | 🟡 中等 | 未等待线程退出,可能导致资源泄漏 |
33
+ | 无心跳超时检测 | 🟡 设计缺陷 | 只发送心跳不检测响应,无法主动发现断连 |
34
+
35
+ ### 2.2 AuthClient 问题
36
+
37
+ | 问题 | 严重程度 | 描述 |
38
+ |------|---------|------|
39
+ | 重试逻辑缺陷 | 🟡 中等 | `is_retry` 标志位设置后永不重置,导致后续登录尝试直接返回 |
40
+ | 递归调用未返回结果 | 🟡 中等 | `sign_in()` 递归调用未 return,结果丢失 |
41
+ | 返回值不一致 | 🟡 中等 | 失败时返回空字符串 `""`,调用方当 dict 使用会崩溃 |
42
+ | HTTP 请求无超时 | 🟡 中等 | 可能导致线程被无限阻塞 |
43
+
44
+ ---
45
+
46
+ ## 三、修复方案
47
+
48
+ ### 3.1 HeartbeatClient 修复
49
+
50
+ #### 3.1.1 新增常量配置
51
+
52
+ ```python
53
+ class HeartbeatClient:
54
+ MAX_SEND_FAILURES = 3 # 发送失败触发重连的阈值
55
+ MAX_RECV_FAILURES = 3 # 接收失败触发重连的阈值
56
+ MAX_MISSED_HEARTBEATS = 3 # 心跳响应超时阈值(错过次数)
57
+ RECONNECT_BACKOFF_MAX = 30 # 重连退避上限(秒)
58
+ SOCKET_TIMEOUT = 1.0 # socket 超时时间(秒)
59
+ ```
60
+
61
+ #### 3.1.2 新增状态变量
62
+
63
+ ```python
64
+ self._socket_lock = threading.Lock() # 保护 socket 操作
65
+ self._reconnect_lock = threading.Lock() # 防止并发重连
66
+ self._last_reconnect_ts = 0 # 上次重连时间戳
67
+ self._last_hb_recv = 0 # 上次收到心跳响应的时间戳
68
+ self._send_failures = 0 # 连续发送失败次数
69
+ self._recv_failures = 0 # 连续接收失败次数
70
+ ```
71
+
72
+ #### 3.1.3 统一 Socket 生命周期管理
73
+
74
+ 新增方法:
75
+ - `_create_socket()`: 创建并绑定 UDP socket,设置超时
76
+ - `_close_socket()`: 安全关闭 socket
77
+ - `_reconnect(reason)`: 限流/退避后执行 sign_in + 重建 socket
78
+
79
+ #### 3.1.4 发送线程异常恢复
80
+
81
+ ```python
82
+ def __send_heartbeat(self):
83
+ backoff = 1
84
+ while self.is_sending_heartbeat and self.is_running:
85
+ try:
86
+ # 检查心跳响应超时
87
+ if self._last_hb_recv > 0:
88
+ timeout_threshold = self.MAX_MISSED_HEARTBEATS * self.heartbeat_interval
89
+ if current_time_ms - self._last_hb_recv > timeout_threshold:
90
+ self._reconnect("heartbeat_response_timeout")
91
+ continue
92
+
93
+ # 发送心跳(使用锁保护 socket)
94
+ with self._socket_lock:
95
+ if self.udp_socket is not None:
96
+ self.udp_socket.sendto(data, (self.server_ip, self.port))
97
+
98
+ self._send_failures = 0
99
+ backoff = 1
100
+
101
+ except Exception as e:
102
+ self._send_failures += 1
103
+ if self._send_failures >= self.MAX_SEND_FAILURES:
104
+ self._reconnect("send_failures_threshold")
105
+ else:
106
+ time.sleep(backoff)
107
+ backoff = min(backoff * 2, self.RECONNECT_BACKOFF_MAX)
108
+ ```
109
+
110
+ #### 3.1.5 接收线程可中断、可恢复
111
+
112
+ ```python
113
+ def _receive_messages(self):
114
+ while self.is_running:
115
+ try:
116
+ # socket 设置超时,确保能定期检查 is_running
117
+ try:
118
+ data, addr = sock.recvfrom(1536)
119
+ except socket.timeout:
120
+ continue # 超时是正常的
121
+
122
+ self._recv_failures = 0
123
+ self._last_hb_recv = current_time_ms # 更新响应时间
124
+
125
+ if hb_resp.NextBeat == 401:
126
+ self._reconnect("401_auth_failed") # 401 触发重连
127
+
128
+ except Exception as e:
129
+ self._recv_failures += 1
130
+ if self._recv_failures >= self.MAX_RECV_FAILURES:
131
+ self._reconnect("recv_failures_threshold")
132
+ ```
133
+
134
+ #### 3.1.6 修复 offline() 关闭顺序
135
+
136
+ ```python
137
+ def offline(self):
138
+ # 1. 先设置标志位
139
+ self.is_running = False
140
+ self.is_sending_heartbeat = False
141
+
142
+ # 2. 关闭 socket
143
+ self._close_socket()
144
+
145
+ # 3. 等待线程退出
146
+ if self.send_thread is not None and self.send_thread.is_alive():
147
+ self.send_thread.join(timeout=3)
148
+ if self.receive_thread is not None and self.receive_thread.is_alive():
149
+ self.receive_thread.join(timeout=3)
150
+ ```
151
+
152
+ ### 3.2 AuthClient 修复
153
+
154
+ #### 3.2.1 新增 HTTP 超时配置
155
+
156
+ ```python
157
+ HTTP_TIMEOUT = (3, 10) # (连接超时, 读取超时)
158
+ ```
159
+
160
+ #### 3.2.2 重写 sign_in() 重试逻辑
161
+
162
+ ```python
163
+ def sign_in(self, max_retry_num: int = 10) -> Union[dict, None]:
164
+ """登录方法,使用循环重试,失败返回 None"""
165
+ for retry_count in range(max_retry_num + 1):
166
+ try:
167
+ if retry_count > 0:
168
+ backoff = min(2 * retry_count, 30) # 指数退避
169
+ time.sleep(backoff)
170
+
171
+ response = requests.post(url, ..., timeout=self.HTTP_TIMEOUT)
172
+
173
+ if response.status_code == 200:
174
+ # ... 处理成功响应 ...
175
+ return result
176
+
177
+ except Exception as e:
178
+ log_warning(f"Sign in exception (retry {retry_count}/{max_retry_num}): {e}")
179
+
180
+ log_error(f"Sign in failed after {max_retry_num} retries")
181
+ return None # 统一返回 None,不再返回空字符串
182
+ ```
183
+
184
+ #### 3.2.3 所有 HTTP 请求添加超时
185
+
186
+ - `sign_in()`: 添加 `timeout=self.HTTP_TIMEOUT`
187
+ - `sign_out()`: 添加 `timeout=self.HTTP_TIMEOUT`
188
+ - `__check_server_cert()`: 添加 `timeout=self.HTTP_TIMEOUT`
189
+
190
+ ---
191
+
192
+ ## 四、修改文件清单
193
+
194
+ | 文件路径 | 修改类型 | 说明 |
195
+ |---------|---------|------|
196
+ | `agentcp/heartbeat/heartbeat_client.py` | 重构 | 添加自动恢复机制 |
197
+ | `agentcp/base/auth_client.py` | 重构 | 修复重试逻辑 |
198
+
199
+ ---
200
+
201
+ ## 五、修复效果
202
+
203
+ ### 5.1 解决的问题
204
+
205
+ | 场景 | 修复前 | 修复后 |
206
+ |------|-------|-------|
207
+ | 网络短暂中断 | ❌ 心跳永久失效 | ✅ 自动重连恢复 |
208
+ | 服务器重启(401) | ❌ 心跳发送到错误地址 | ✅ 重建 socket 恢复 |
209
+ | 长时间无响应 | ❌ 无法检测 | ✅ 超时检测触发重连 |
210
+ | 登录失败 | ❌ 一次失败后永久失败 | ✅ 循环重试直到成功 |
211
+ | 调用 offline() | ❌ 可能异常/资源泄漏 | ✅ 安全关闭 |
212
+
213
+ ### 5.2 新增能力
214
+
215
+ 1. **心跳响应超时检测**: 连续 3 次心跳周期无响应,自动触发重连
216
+ 2. **发送/接收失败计数**: 连续 3 次失败触发重连
217
+ 3. **指数退避重试**: 避免频繁重连对服务器造成压力
218
+ 4. **重连限流**: 距离上次重连至少间隔 5 秒
219
+ 5. **线程安全**: 使用锁保护 socket 操作
220
+
221
+ ---
222
+
223
+ ## 六、测试建议
224
+
225
+ ### 6.1 功能测试
226
+
227
+ 1. **正常心跳**: 启动后确认心跳持续发送并收到响应
228
+ 2. **网络中断恢复**: 断网 30 秒后恢复,观察是否自动重连
229
+ 3. **服务器重启**: 服务端重启返回 401,确认客户端自动重新登录
230
+ 4. **offline/online 循环**: 重复调用,确认无资源泄漏
231
+
232
+ ### 6.2 异常测试
233
+
234
+ 1. **模拟 socket 异常**: 确认触发重连
235
+ 2. **模拟登录失败**: 确认重试机制正常工作
236
+ 3. **模拟长时间无响应**: 确认超时检测触发重连
237
+
238
+ ---
239
+
240
+ ## 七、回滚策略
241
+
242
+ 如需回滚,可按以下步骤操作:
243
+
244
+ 1. 使用 git 恢复修改前的版本:
245
+ ```bash
246
+ git checkout <commit-hash> -- python_backend/agentcp/heartbeat/heartbeat_client.py
247
+ git checkout <commit-hash> -- python_backend/agentcp/base/auth_client.py
248
+ ```
249
+
250
+ 2. 或者仅回滚部分修改:
251
+ - 保留 `AuthClient` 的 HTTP 超时修复
252
+ - 回滚 `HeartbeatClient` 的重连逻辑
253
+
254
+ ---
255
+
256
+ ## 八、后续优化建议
257
+
258
+ 1. **添加监控指标**: 记录重连次数、失败次数等,便于运维监控
259
+ 2. **配置化参数**: 将重连阈值、超时时间等参数配置化
260
+ 3. **健康检查接口**: 提供 API 查询心跳状态
261
+ 4. **日志分级**: 区分 DEBUG/INFO/WARNING/ERROR 日志级别
262
+
263
+ ---
264
+
265
+ *报告生成时间: 2026-01-30 08:07:15 UTC*
@@ -0,0 +1,291 @@
1
+ # AgentCP UDP 心跳断开后无法自动恢复的问题分析报告
2
+
3
+ ## 一、问题现象
4
+
5
+ 服务器端观察到客户端没有发送心跳,但客户端重启后恢复正常。说明心跳发送线程在某种异常情况下停止了,且没有自动恢复机制。
6
+
7
+ ---
8
+
9
+ ## 二、代码审查发现的问题
10
+
11
+ ### 问题 1:UDP Socket 异常后线程静默退出(严重)
12
+
13
+ **文件**: `heartbeat_client.py:71-94`
14
+
15
+ ```python
16
+ def __send_heartbeat(self):
17
+ while self.is_sending_heartbeat and self.is_running:
18
+ try:
19
+ # ... 发送心跳 ...
20
+ self.udp_socket.sendto(data, (self.server_ip, self.port))
21
+ except Exception as e:
22
+ print(f"Heartbeat send error: {e}")
23
+ ErrorContext.publish(...)
24
+ # ❌ 问题:异常后没有任何恢复措施,只是打印错误继续循环
25
+ ```
26
+
27
+ **问题分析**:
28
+ - 如果 `self.udp_socket` 变成 `None` 或被关闭,`sendto()` 会抛出异常
29
+ - 异常被捕获后只是打印错误,**没有尝试重建 socket**
30
+ - 如果 socket 损坏,后续所有心跳发送都会失败,但线程不会退出也不会恢复
31
+
32
+ ---
33
+
34
+ ### 问题 2:接收线程异常可能导致 socket 状态不一致(严重)
35
+
36
+ **文件**: `heartbeat_client.py:96-133`
37
+
38
+ ```python
39
+ def _receive_messages(self):
40
+ while self.is_running:
41
+ try:
42
+ data, addr = self.udp_socket.recvfrom(1536) # ❌ 阻塞调用
43
+ # ... 处理响应 ...
44
+ except Exception as e:
45
+ print(f"Receive message exception: {e}")
46
+ time.sleep(1.5)
47
+ # ❌ 问题:没有检查 socket 是否仍然有效
48
+ ```
49
+
50
+ **问题分析**:
51
+ - `recvfrom()` 是阻塞调用,如果 socket 被关闭会抛出异常
52
+ - 异常后只是 sleep 1.5 秒继续,**没有检查 socket 状态**
53
+ - 如果 socket 已损坏,会陷入无限的异常-sleep-异常循环
54
+
55
+ ---
56
+
57
+ ### 问题 3:401 重新登录后没有更新 socket 连接信息(严重)
58
+
59
+ **文件**: `heartbeat_client.py:105-108`
60
+
61
+ ```python
62
+ if hb_resp.NextBeat == 401:
63
+ print(f"Heartbeat failed: {hb_resp.NextBeat}, try sign in again")
64
+ ErrorContext.publish(...)
65
+ self.sign_in() # ❌ 只是重新登录,没有重建 socket
66
+ ```
67
+
68
+ **问题分析**:
69
+ - `sign_in()` 会获取新的 `server_ip`、`port`、`sign_cookie`
70
+ - 但是 **UDP socket 没有重新绑定到新的服务器地址**
71
+ - 如果服务器 IP/端口变了,心跳会发送到错误的地址
72
+
73
+ ---
74
+
75
+ ### 问题 4:`offline()` 关闭 socket 后状态不一致(中等)
76
+
77
+ **文件**: `heartbeat_client.py:157-162`
78
+
79
+ ```python
80
+ def offline(self):
81
+ """停止心跳"""
82
+ if self.udp_socket is not None:
83
+ self.udp_socket.close() # ❌ 关闭 socket
84
+ self.is_running = False # ❌ 设置标志位在后面
85
+ ```
86
+
87
+ **问题分析**:
88
+ - 先关闭 socket,再设置 `is_running = False`
89
+ - 在这个时间窗口内,发送/接收线程可能还在运行,会访问已关闭的 socket
90
+ - 应该先设置标志位,等线程退出后再关闭 socket
91
+
92
+ ---
93
+
94
+ ### 问题 5:线程没有等待退出(中等)
95
+
96
+ **文件**: `heartbeat_client.py:157-162`
97
+
98
+ ```python
99
+ def offline(self):
100
+ if self.udp_socket is not None:
101
+ self.udp_socket.close()
102
+ self.is_running = False
103
+ # ❌ 没有 join() 等待线程退出
104
+ ```
105
+
106
+ **问题分析**:
107
+ - 没有调用 `self.send_thread.join()` 和 `self.receive_thread.join()`
108
+ - 线程可能还在运行时就返回了,导致资源泄漏或状态不一致
109
+
110
+ ---
111
+
112
+ ### 问题 6:`auth_client.sign_in()` 重试逻辑有缺陷(中等)
113
+
114
+ **文件**: `auth_client.py:129-136`
115
+
116
+ ```python
117
+ except Exception as e:
118
+ log_error(f"链接建立失败,正在重试")
119
+ if self.is_retry == False: # ❌ 只有第一次异常才重试
120
+ log_error("重试登录失败,6s后尝试重新连接")
121
+ self.is_retry = True
122
+ time.sleep(6)
123
+ self.sign_in(retry_count+1, max_retry_num)
124
+ # ❌ 如果 is_retry == True,直接返回 None,不再重试
125
+ ```
126
+
127
+ **问题分析**:
128
+ - `is_retry` 标志位设置后永远不会重置为 `False`
129
+ - 第一次重试失败后,后续所有登录尝试都会直接返回 `None`
130
+ - 这会导致心跳客户端拿不到有效的服务器信息
131
+
132
+ ---
133
+
134
+ ### 问题 7:没有心跳超时检测机制(设计缺陷)
135
+
136
+ **问题分析**:
137
+ - 当前实现只是定时发送心跳,**不检测服务器是否响应**
138
+ - 如果网络断开,客户端会一直发送心跳但收不到响应
139
+ - 没有机制检测"连续 N 次没收到响应"然后触发重连
140
+
141
+ ---
142
+
143
+ ## 三、问题根因总结
144
+
145
+ | 问题 | 严重程度 | 影响 |
146
+ |------|---------|------|
147
+ | Socket 异常后不重建 | 🔴 严重 | 心跳永久失效 |
148
+ | 401 重登录后不更新 socket | 🔴 严重 | 心跳发送到错误地址 |
149
+ | 接收线程异常后不检查 socket | 🔴 严重 | 无限异常循环 |
150
+ | offline() 关闭顺序错误 | 🟡 中等 | 可能导致异常 |
151
+ | 线程没有 join() | 🟡 中等 | 资源泄漏 |
152
+ | auth_client 重试逻辑缺陷 | 🟡 中等 | 登录失败后无法恢复 |
153
+ | 无心跳超时检测 | 🟡 设计缺陷 | 无法主动发现断连 |
154
+
155
+ ---
156
+
157
+ ## 四、最可能的故障场景
158
+
159
+ 根据"服务器看不到心跳,重启客户端后正常"的现象,最可能的原因是:
160
+
161
+ ### 场景 A:网络波动导致 socket 异常
162
+ 1. 网络短暂中断
163
+ 2. `sendto()` 或 `recvfrom()` 抛出异常
164
+ 3. 异常被捕获,但 socket 可能已损坏
165
+ 4. 后续心跳发送全部失败,但线程继续运行(静默失败)
166
+
167
+ ### 场景 B:服务器重启返回 401
168
+ 1. 服务器重启
169
+ 2. 客户端收到 401 响应
170
+ 3. 调用 `sign_in()` 获取新的服务器信息
171
+ 4. **但 UDP socket 仍然绑定到旧地址**
172
+ 5. 心跳发送到错误的地址
173
+
174
+ ### 场景 C:登录重试失败后放弃
175
+ 1. 网络异常导致登录失败
176
+ 2. `auth_client.sign_in()` 重试一次后设置 `is_retry = True`
177
+ 3. 后续所有登录尝试直接返回 `None`
178
+ 4. 心跳客户端拿不到有效的服务器信息
179
+
180
+ ---
181
+
182
+ ## 五、建议修复方向
183
+
184
+ ### 5.1 添加 socket 健康检查和重建机制
185
+
186
+ ```python
187
+ def _rebuild_socket(self):
188
+ """重建 UDP socket"""
189
+ try:
190
+ if self.udp_socket:
191
+ self.udp_socket.close()
192
+ except:
193
+ pass
194
+ self.udp_socket = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
195
+ self.udp_socket.bind((self.local_ip, 0))
196
+ self.local_ip, self.local_port = self.udp_socket.getsockname()
197
+ ```
198
+
199
+ ### 5.2 401 重登录后重新创建 UDP socket
200
+
201
+ ```python
202
+ if hb_resp.NextBeat == 401:
203
+ self.sign_in()
204
+ self._rebuild_socket() # 重建 socket
205
+ ```
206
+
207
+ ### 5.3 添加心跳响应超时检测
208
+
209
+ ```python
210
+ def __send_heartbeat(self):
211
+ consecutive_failures = 0
212
+ MAX_FAILURES = 5
213
+
214
+ while self.is_sending_heartbeat and self.is_running:
215
+ try:
216
+ # 检查是否长时间没收到响应
217
+ if time.time() - self.last_response_time > 30:
218
+ consecutive_failures += 1
219
+ if consecutive_failures >= MAX_FAILURES:
220
+ log_error("心跳超时,尝试重连")
221
+ self.sign_in()
222
+ self._rebuild_socket()
223
+ consecutive_failures = 0
224
+ # ... 发送心跳 ...
225
+ except Exception as e:
226
+ # 异常处理
227
+ ```
228
+
229
+ ### 5.4 修复 `offline()` 的关闭顺序
230
+
231
+ ```python
232
+ def offline(self):
233
+ """停止心跳"""
234
+ # 1. 先设置标志位
235
+ self.is_running = False
236
+ self.is_sending_heartbeat = False
237
+
238
+ # 2. 等待线程退出
239
+ if self.send_thread and self.send_thread.is_alive():
240
+ self.send_thread.join(timeout=3)
241
+ if self.receive_thread and self.receive_thread.is_alive():
242
+ self.receive_thread.join(timeout=3)
243
+
244
+ # 3. 最后关闭 socket
245
+ if self.udp_socket:
246
+ self.udp_socket.close()
247
+ self.udp_socket = None
248
+ ```
249
+
250
+ ### 5.5 修复 `auth_client` 的重试逻辑
251
+
252
+ ```python
253
+ def sign_in(self, retry_count=0, max_retry_num=10) -> Union[dict, None]:
254
+ try:
255
+ # ... 登录逻辑 ...
256
+ except Exception as e:
257
+ log_error(f"链接建立失败,正在重试 ({retry_count}/{max_retry_num})")
258
+ if retry_count < max_retry_num:
259
+ time.sleep(min(6 * (retry_count + 1), 30)) # 指数退避
260
+ return self.sign_in(retry_count + 1, max_retry_num)
261
+ else:
262
+ log_error("重试登录失败,已达最大重试次数")
263
+ return None
264
+ ```
265
+
266
+ ---
267
+
268
+ ## 六、相关文件清单
269
+
270
+ | 文件路径 | 说明 |
271
+ |---------|------|
272
+ | `agentcp/heartbeat/heartbeat_client.py` | UDP 心跳客户端主文件 |
273
+ | `agentcp/base/auth_client.py` | 认证客户端 |
274
+ | `agentcp/agentcp.py` | AgentID 主入口 |
275
+ | `agentcp/context/context.py` | 错误上下文处理 |
276
+
277
+ ---
278
+
279
+ ## 七、总结
280
+
281
+ 当前 UDP 心跳实现存在多个严重的异常恢复缺陷,主要问题是:
282
+
283
+ 1. **Socket 异常后不重建** - 导致心跳永久失效
284
+ 2. **401 重登录后不更新连接** - 导致心跳发送到错误地址
285
+ 3. **缺少心跳超时检测** - 无法主动发现断连
286
+
287
+ 建议按照第五节的修复方向进行改进,增强心跳机制的健壮性和自动恢复能力。
288
+
289
+ ---
290
+
291
+ *报告生成时间: 2026-01-30*
@@ -0,0 +1,16 @@
1
+ # -*- coding: utf-8 -*-
2
+ # Copyright 2025 AgentUnion Inc.
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+
16
+ from . import file_client