@agentunion/kite 1.3.2 → 1.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (293) hide show
  1. package/CHANGELOG.md +302 -0
  2. package/cli.js +119 -4
  3. package/core/dependency_checker.py +250 -0
  4. package/core/env_checker.py +490 -0
  5. package/dependencies_lock.json +128 -0
  6. package/extensions/agents/assistant/entry.py +111 -1
  7. package/extensions/agents/assistant/server.py +279 -215
  8. package/extensions/channels/acp_channel/entry.py +111 -1
  9. package/extensions/channels/acp_channel/module.md +23 -22
  10. package/extensions/channels/acp_channel/server.py +279 -215
  11. package/extensions/event_hub_bench/entry.py +107 -1
  12. package/extensions/services/backup/entry.py +306 -21
  13. package/extensions/services/backup/module.md +24 -22
  14. package/extensions/services/evol/auth_manager.py +443 -0
  15. package/extensions/services/evol/config.yaml +149 -0
  16. package/extensions/services/evol/config_loader.py +117 -0
  17. package/extensions/services/evol/entry.py +406 -0
  18. package/extensions/services/evol/evol_api.py +173 -0
  19. package/extensions/services/evol/evol_config.json5 +29 -0
  20. package/extensions/services/evol/migrate_tokens.py +122 -0
  21. package/extensions/services/evol/module.md +32 -0
  22. package/extensions/services/evol/pairing.py +250 -0
  23. package/extensions/services/evol/pairing_codes.jsonl +1 -0
  24. package/extensions/services/evol/relay.py +682 -0
  25. package/extensions/services/evol/relay_config.json5 +67 -0
  26. package/extensions/services/evol/routes/__init__.py +1 -0
  27. package/extensions/services/evol/routes/routes_management_ws.py +127 -0
  28. package/extensions/services/evol/routes/routes_rpc.py +89 -0
  29. package/extensions/services/evol/routes/routes_test.py +61 -0
  30. package/extensions/services/evol/server.py +875 -0
  31. package/extensions/services/evol/static/css/style.css +1200 -0
  32. package/extensions/services/evol/static/index.html +781 -0
  33. package/extensions/services/evol/static/index_evol.html +14 -0
  34. package/extensions/services/evol/static/js/app.js +6304 -0
  35. package/extensions/services/evol/static/js/auth.js +326 -0
  36. package/extensions/services/evol/static/js/dialog.js +285 -0
  37. package/extensions/services/evol/static/js/evol-app-fixed.js +50 -0
  38. package/extensions/services/evol/static/js/evol-app.js +1949 -0
  39. package/extensions/services/evol/static/js/evol-app.js.bak +1800 -0
  40. package/extensions/services/evol/static/js/kernel-client-example.js +228 -0
  41. package/extensions/services/evol/static/js/kernel-client.js +396 -0
  42. package/extensions/services/evol/static/js/main.js +141 -0
  43. package/extensions/services/evol/static/js/registry-tests.js +585 -0
  44. package/extensions/services/evol/static/js/stats.js +217 -0
  45. package/extensions/services/evol/static/js/token-manager.js +175 -0
  46. package/extensions/services/evol/static/pairing.html +248 -0
  47. package/extensions/services/evol/static/test_registry.html +262 -0
  48. package/extensions/services/evol/static/test_relay.html +462 -0
  49. package/extensions/services/evol/stats_manager.py +240 -0
  50. package/extensions/services/model_service/entry.py +167 -19
  51. package/extensions/services/model_service/module.md +21 -22
  52. package/extensions/services/proxy/.claude/settings.local.json +13 -0
  53. package/extensions/services/proxy/CHANGELOG_20260308.md +258 -0
  54. package/extensions/services/proxy/_fix_prints.py +133 -0
  55. package/extensions/services/proxy/_fix_prints2.py +87 -0
  56. package/extensions/services/proxy/agentcp/LICENCE +178 -0
  57. package/extensions/services/proxy/agentcp/README copy.md +85 -0
  58. package/extensions/services/proxy/agentcp/README.md +260 -0
  59. package/extensions/services/proxy/agentcp/__init__.py +16 -0
  60. package/extensions/services/proxy/agentcp/agent.py +4 -0
  61. package/extensions/services/proxy/agentcp/agentcp.py +2494 -0
  62. package/extensions/services/proxy/agentcp/agentprofile.json +89 -0
  63. package/extensions/services/proxy/agentcp/ap/__init__.py +16 -0
  64. package/extensions/services/proxy/agentcp/ap/ap_client.py +316 -0
  65. package/extensions/services/proxy/agentcp/assets/images/wechat_qr.png +0 -0
  66. package/extensions/services/proxy/agentcp/backup/metrics.json +31 -0
  67. package/extensions/services/proxy/agentcp/base/__init__.py +20 -0
  68. package/extensions/services/proxy/agentcp/base/auth_client.py +257 -0
  69. package/extensions/services/proxy/agentcp/base/client.py +112 -0
  70. package/extensions/services/proxy/agentcp/base/env.py +34 -0
  71. package/extensions/services/proxy/agentcp/base/html_util.py +336 -0
  72. package/extensions/services/proxy/agentcp/base/log.py +98 -0
  73. package/extensions/services/proxy/agentcp/ca/__init__.py +17 -0
  74. package/extensions/services/proxy/agentcp/ca/ca_client.py +414 -0
  75. package/extensions/services/proxy/agentcp/ca/ca_root.py +74 -0
  76. package/extensions/services/proxy/agentcp/context/__init__.py +20 -0
  77. package/extensions/services/proxy/agentcp/context/context.py +73 -0
  78. package/extensions/services/proxy/agentcp/context/exceptions.py +114 -0
  79. package/extensions/services/proxy/agentcp/create_profile.py +125 -0
  80. package/extensions/services/proxy/agentcp/create_profile_weather.py +125 -0
  81. package/extensions/services/proxy/agentcp/db/__init__.py +15 -0
  82. package/extensions/services/proxy/agentcp/db/db_mananger.py +550 -0
  83. package/extensions/services/proxy/agentcp/docs/UDP_HEARTBEAT_FIX_REPORT.md +265 -0
  84. package/extensions/services/proxy/agentcp/docs/heartbeat_issue_analysis.md +291 -0
  85. package/extensions/services/proxy/agentcp/file/__init__.py +16 -0
  86. package/extensions/services/proxy/agentcp/file/file_client.py +141 -0
  87. package/extensions/services/proxy/agentcp/file/wss_binary_message.py +137 -0
  88. package/extensions/services/proxy/agentcp/hcp.py +299 -0
  89. package/extensions/services/proxy/agentcp/heartbeat/__init__.py +16 -0
  90. package/extensions/services/proxy/agentcp/heartbeat/heartbeat_client.py +360 -0
  91. package/extensions/services/proxy/agentcp/improved_scheduler.py +498 -0
  92. package/extensions/services/proxy/agentcp/llm_agent_utils.py +249 -0
  93. package/extensions/services/proxy/agentcp/llm_server.py +172 -0
  94. package/extensions/services/proxy/agentcp/mermaid.py +210 -0
  95. package/extensions/services/proxy/agentcp/message.py +149 -0
  96. package/extensions/services/proxy/agentcp/metrics.py +256 -0
  97. package/extensions/services/proxy/agentcp/monitoring/__init__.py +20 -0
  98. package/extensions/services/proxy/agentcp/monitoring/global_monitor.py +27 -0
  99. package/extensions/services/proxy/agentcp/monitoring/metrics_store.py +325 -0
  100. package/extensions/services/proxy/agentcp/monitoring/monitoring_service.py +269 -0
  101. package/extensions/services/proxy/agentcp/monitoring/sliding_window.py +222 -0
  102. package/extensions/services/proxy/agentcp/monitoring/standalone_reader.py +224 -0
  103. package/extensions/services/proxy/agentcp/msg/__init__.py +21 -0
  104. package/extensions/services/proxy/agentcp/msg/connection_manager.py +456 -0
  105. package/extensions/services/proxy/agentcp/msg/message_client.py +2058 -0
  106. package/extensions/services/proxy/agentcp/msg/message_serialize.py +263 -0
  107. package/extensions/services/proxy/agentcp/msg/open_ai_message.py +88 -0
  108. package/extensions/services/proxy/agentcp/msg/session_manager.py +1062 -0
  109. package/extensions/services/proxy/agentcp/msg/stream_client.py +267 -0
  110. package/extensions/services/proxy/agentcp/msg/websocket_file_receiver.py +89 -0
  111. package/extensions/services/proxy/agentcp/msg/ws_logger.py +685 -0
  112. package/extensions/services/proxy/agentcp/msg/wss_binary_message.py +137 -0
  113. package/extensions/services/proxy/agentcp/requirements.txt +7 -0
  114. package/extensions/services/proxy/agentcp/samples/agent_graph/README.md +37 -0
  115. package/extensions/services/proxy/agentcp/samples/agent_graph/agentprofile.json +89 -0
  116. package/extensions/services/proxy/agentcp/samples/agent_graph/create_profile.py +138 -0
  117. package/extensions/services/proxy/agentcp/samples/agent_graph/main.py +164 -0
  118. package/extensions/services/proxy/agentcp/samples/agent_use/create_profile.py +123 -0
  119. package/extensions/services/proxy/agentcp/samples/agent_use/llm/create_profile.py +129 -0
  120. package/extensions/services/proxy/agentcp/samples/agent_use/llm/env.json +5 -0
  121. package/extensions/services/proxy/agentcp/samples/agent_use/llm/main.py +146 -0
  122. package/extensions/services/proxy/agentcp/samples/agent_use/main.py +123 -0
  123. package/extensions/services/proxy/agentcp/samples/agent_use/readme.md +379 -0
  124. package/extensions/services/proxy/agentcp/samples/agent_use/search/create_profile.py +129 -0
  125. package/extensions/services/proxy/agentcp/samples/agent_use/search/main.py +28 -0
  126. package/extensions/services/proxy/agentcp/samples/agent_use/tool/create_profile.py +129 -0
  127. package/extensions/services/proxy/agentcp/samples/agent_use/tool/main.py +20 -0
  128. package/extensions/services/proxy/agentcp/samples/ali_amap/README.md +97 -0
  129. package/extensions/services/proxy/agentcp/samples/ali_amap/amap_agent.py +88 -0
  130. package/extensions/services/proxy/agentcp/samples/ali_amap/create_profile.py +125 -0
  131. package/extensions/services/proxy/agentcp/samples/compute_agent/agent/powershell.py +228 -0
  132. package/extensions/services/proxy/agentcp/samples/compute_agent/agent/software.py +63 -0
  133. package/extensions/services/proxy/agentcp/samples/compute_agent/agent/tools.py +36 -0
  134. package/extensions/services/proxy/agentcp/samples/compute_agent/browser_user.py +41 -0
  135. package/extensions/services/proxy/agentcp/samples/deepseek/README.md +79 -0
  136. package/extensions/services/proxy/agentcp/samples/deepseek/create_profile.py +126 -0
  137. package/extensions/services/proxy/agentcp/samples/deepseek/deepseek.py +42 -0
  138. package/extensions/services/proxy/agentcp/samples/dify_chat/README.md +78 -0
  139. package/extensions/services/proxy/agentcp/samples/dify_chat/create_profile.py +126 -0
  140. package/extensions/services/proxy/agentcp/samples/dify_chat/dify_chat.py +47 -0
  141. package/extensions/services/proxy/agentcp/samples/dify_workflow/README.md +78 -0
  142. package/extensions/services/proxy/agentcp/samples/dify_workflow/create_profile.py +126 -0
  143. package/extensions/services/proxy/agentcp/samples/dify_workflow/dify_workflow.py +46 -0
  144. package/extensions/services/proxy/agentcp/samples/executor/README.md +44 -0
  145. package/extensions/services/proxy/agentcp/samples/executor/agentprofile.json +89 -0
  146. package/extensions/services/proxy/agentcp/samples/executor/create_profile.py +139 -0
  147. package/extensions/services/proxy/agentcp/samples/executor/main.py +160 -0
  148. package/extensions/services/proxy/agentcp/samples/filereader/README.md +45 -0
  149. package/extensions/services/proxy/agentcp/samples/filereader/agentprofile.json +90 -0
  150. package/extensions/services/proxy/agentcp/samples/filereader/create_profile.py +137 -0
  151. package/extensions/services/proxy/agentcp/samples/filereader/main.py +253 -0
  152. package/extensions/services/proxy/agentcp/samples/filewriter/README.md +38 -0
  153. package/extensions/services/proxy/agentcp/samples/filewriter/agentprofile.json +91 -0
  154. package/extensions/services/proxy/agentcp/samples/filewriter/create_profile.py +138 -0
  155. package/extensions/services/proxy/agentcp/samples/filewriter/main.py +289 -0
  156. package/extensions/services/proxy/agentcp/samples/hcp/README.md +85 -0
  157. package/extensions/services/proxy/agentcp/samples/hcp/acp_weather_agent.zip +0 -0
  158. package/extensions/services/proxy/agentcp/samples/hcp/create_profile.py +125 -0
  159. package/extensions/services/proxy/agentcp/samples/hcp/hcp.py +237 -0
  160. package/extensions/services/proxy/agentcp/samples/helloworld/README.md +68 -0
  161. package/extensions/services/proxy/agentcp/samples/helloworld/hello_world.py +40 -0
  162. package/extensions/services/proxy/agentcp/samples/llm_agent/MEADME.md +117 -0
  163. package/extensions/services/proxy/agentcp/samples/llm_agent/create_profile.py +125 -0
  164. package/extensions/services/proxy/agentcp/samples/llm_agent/qwen_agent.py +136 -0
  165. package/extensions/services/proxy/agentcp/samples/local_llm_agent/README.md +90 -0
  166. package/extensions/services/proxy/agentcp/samples/local_llm_agent/create_profile.py +125 -0
  167. package/extensions/services/proxy/agentcp/samples/local_llm_agent/main.py +49 -0
  168. package/extensions/services/proxy/agentcp/samples/query_llm_from_agent/README.md +55 -0
  169. package/extensions/services/proxy/agentcp/samples/query_llm_from_agent/create_profile.py +125 -0
  170. package/extensions/services/proxy/agentcp/samples/query_llm_from_agent/main.py +23 -0
  171. package/extensions/services/proxy/agentcp/samples/query_weather_api_agent/README.md +103 -0
  172. package/extensions/services/proxy/agentcp/samples/query_weather_api_agent/create_profile.py +125 -0
  173. package/extensions/services/proxy/agentcp/samples/query_weather_api_agent/main.py +69 -0
  174. package/extensions/services/proxy/agentcp/samples/query_weather_from_agent/README.md +58 -0
  175. package/extensions/services/proxy/agentcp/samples/query_weather_from_agent/create_profile.py +125 -0
  176. package/extensions/services/proxy/agentcp/samples/query_weather_from_agent/main.py +25 -0
  177. package/extensions/services/proxy/agentcp/samples/qwen3/README.md +71 -0
  178. package/extensions/services/proxy/agentcp/samples/qwen3/create_profile.py +126 -0
  179. package/extensions/services/proxy/agentcp/samples/qwen3/qwen3.py +37 -0
  180. package/extensions/services/proxy/agentcp/samples/qwen3_tools/README.md +133 -0
  181. package/extensions/services/proxy/agentcp/samples/qwen3_tools/create_profile.py +126 -0
  182. package/extensions/services/proxy/agentcp/samples/qwen3_tools/qwen3_tools.py +98 -0
  183. package/extensions/services/proxy/agentcp/samples/search/create_profile_qwen.py +125 -0
  184. package/extensions/services/proxy/agentcp/samples/search/create_profile_search.py +125 -0
  185. package/extensions/services/proxy/agentcp/samples/search/qwen_agent.py +136 -0
  186. package/extensions/services/proxy/agentcp/samples/search/search_agent.py +170 -0
  187. package/extensions/services/proxy/agentcp/samples/wrapper_agently_to_agent/README.md +89 -0
  188. package/extensions/services/proxy/agentcp/samples/wrapper_agently_to_agent/create_profile.py +125 -0
  189. package/extensions/services/proxy/agentcp/samples/wrapper_agently_to_agent/main.py +44 -0
  190. package/extensions/services/proxy/agentcp/utils/__init__.py +15 -0
  191. package/extensions/services/proxy/agentcp/utils/file_util.py +117 -0
  192. package/extensions/services/proxy/agentcp/utils/proxy_bypass.py +99 -0
  193. package/extensions/services/proxy/agentcp/workflow.py +203 -0
  194. package/extensions/services/proxy/console_auth.py +109 -0
  195. package/extensions/services/proxy/evol/__init__.py +1 -0
  196. package/extensions/services/proxy/evol/config.py +37 -0
  197. package/extensions/services/proxy/evol/http/__init__.py +1 -0
  198. package/extensions/services/proxy/evol/http/async_http.py +551 -0
  199. package/extensions/services/proxy/evol/log.py +28 -0
  200. package/extensions/services/proxy/evol/presenter/__init__.py +2 -0
  201. package/extensions/services/proxy/evol/presenter/agentIdPresenter.py +1031 -0
  202. package/extensions/services/proxy/evol/presenter/apikeyPresenter.py +106 -0
  203. package/extensions/services/proxy/evol/presenter/configPresenter.py +1281 -0
  204. package/extensions/services/proxy/evol/presenter/userPresenter.py +477 -0
  205. package/extensions/services/proxy/evol/server/__init__.py +1 -0
  206. package/extensions/services/proxy/evol/server/claude_proxy_async.py +3430 -0
  207. package/extensions/services/proxy/evol/server/openclaw_proxy.py +1861 -0
  208. package/extensions/services/proxy/evol/server/proxy_config.py +15 -0
  209. package/extensions/services/proxy/evol/server/proxy_engine.py +501 -0
  210. package/extensions/services/proxy/evol/version.py +24 -0
  211. package/extensions/services/proxy/logs/websocket.log +260 -0
  212. package/extensions/services/proxy/main.py +240 -0
  213. package/extensions/services/proxy/requirements.txt +13 -0
  214. package/extensions/services/proxy/server.py +271 -0
  215. package/extensions/services/watchdog/entry.py +215 -26
  216. package/extensions/services/watchdog/module.md +1 -0
  217. package/extensions/services/watchdog/monitor.py +178 -38
  218. package/extensions/services/web/WEBSOCKET_STATUS.md +143 -0
  219. package/extensions/services/web/config_example.py +35 -0
  220. package/extensions/services/web/config_loader.py +110 -0
  221. package/extensions/services/web/entry.py +114 -26
  222. package/extensions/services/web/module.md +35 -24
  223. package/extensions/services/web/pairing.py +250 -0
  224. package/extensions/services/web/pairing_codes.jsonl +16 -0
  225. package/extensions/services/web/relay.py +643 -0
  226. package/extensions/services/web/relay_config.json5 +67 -0
  227. package/extensions/services/web/routes/routes_management_ws.py +127 -0
  228. package/extensions/services/web/routes/routes_rpc.py +89 -0
  229. package/extensions/services/web/routes/routes_test.py +61 -0
  230. package/extensions/services/web/routes/schemas.py +0 -22
  231. package/extensions/services/web/server.py +434 -99
  232. package/extensions/services/web/static/css/style.css +67 -28
  233. package/extensions/services/web/static/index.html +234 -44
  234. package/extensions/services/web/static/js/app.js +1335 -48
  235. package/extensions/services/web/static/js/kernel-client-example.js +161 -0
  236. package/extensions/services/web/static/js/kernel-client.js +383 -0
  237. package/extensions/services/web/static/js/registry-tests.js +558 -0
  238. package/extensions/services/web/static/js/token-manager.js +175 -0
  239. package/extensions/services/web/static/pairing.html +248 -0
  240. package/extensions/services/web/static/test_registry.html +262 -0
  241. package/extensions/services/web/web_config.json5 +29 -0
  242. package/kernel/entry.py +120 -32
  243. package/kernel/event_hub.py +141 -16
  244. package/kernel/module.md +60 -33
  245. package/kernel/registry_store.py +45 -36
  246. package/kernel/rpc_router.py +152 -59
  247. package/kernel/server.py +322 -26
  248. package/kite_cli/__init__.py +3 -0
  249. package/kite_cli/__main__.py +5 -0
  250. package/kite_cli/commands/__init__.py +1 -0
  251. package/kite_cli/commands/clean.py +101 -0
  252. package/kite_cli/commands/deps_install.py +67 -0
  253. package/kite_cli/commands/doctor.py +35 -0
  254. package/kite_cli/commands/env_check.py +45 -0
  255. package/kite_cli/commands/history.py +111 -0
  256. package/kite_cli/commands/info.py +96 -0
  257. package/kite_cli/commands/install.py +313 -0
  258. package/kite_cli/commands/list.py +143 -0
  259. package/kite_cli/commands/log.py +81 -0
  260. package/kite_cli/commands/prepare.py +49 -0
  261. package/kite_cli/commands/rollback.py +88 -0
  262. package/kite_cli/commands/search.py +73 -0
  263. package/kite_cli/commands/uninstall.py +85 -0
  264. package/kite_cli/commands/update.py +118 -0
  265. package/kite_cli/commands/venv_setup.py +56 -0
  266. package/kite_cli/core/__init__.py +1 -0
  267. package/kite_cli/core/checker.py +142 -0
  268. package/kite_cli/core/dependency.py +229 -0
  269. package/kite_cli/core/downloader.py +209 -0
  270. package/kite_cli/core/install_info.py +40 -0
  271. package/kite_cli/core/tool_installer.py +397 -0
  272. package/kite_cli/core/validator.py +78 -0
  273. package/kite_cli/main.py +317 -0
  274. package/kite_cli/utils/__init__.py +1 -0
  275. package/kite_cli/utils/i18n.py +252 -0
  276. package/kite_cli/utils/interactive.py +63 -0
  277. package/kite_cli/utils/operation_log.py +77 -0
  278. package/kite_cli/utils/paths.py +34 -0
  279. package/kite_cli/utils/version.py +308 -0
  280. package/launcher/entry.py +1124 -178
  281. package/launcher/logging_setup.py +104 -0
  282. package/launcher/module.md +46 -37
  283. package/launcher/module_scanner.py +11 -1
  284. package/main.py +4 -1
  285. package/package.json +9 -1
  286. package/python_version.json +4 -0
  287. package/requirements.txt +38 -0
  288. package/scripts/env-manager.js +328 -0
  289. package/scripts/plan_manager.py +315 -0
  290. package/scripts/python-env.js +79 -0
  291. package/scripts/scan_dependencies.py +461 -0
  292. package/scripts/setup-python-env.js +191 -0
  293. package/extensions/services/web/routes/routes_modules.py +0 -249
@@ -0,0 +1,3430 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Claude API HTTP代理服务器 - FastAPI异步版本
4
+ 通过AgentCP转发请求到Claude服务
5
+ 此版本集成到FastAPI中,提供高并发能力
6
+
7
+ 重建机制:
8
+ 1. 使用 _rebuild_lock 保护全局变量修改,防止竞态条件
9
+ 2. 同步重建入口:_full_rebuild_agentcp_system()(用于 WebSocket 断开回调)
10
+ 3. 异步重建入口:force_rebuild_agentcp_system()(用于 API 调用)
11
+ 4. 30分钟冷却机制防止频繁重建
12
+ 5. 连续无响应计数机制:只有连续10次无响应才触发自动重建,其他情况直接返回错误让用户重试
13
+ """
14
+
15
+ import asyncio
16
+ import json
17
+ import time
18
+ import os,uuid
19
+ import urllib.parse
20
+ import threading
21
+ from datetime import datetime
22
+ from typing import Optional
23
+ import httpx
24
+ from fastapi import Request, Response, Header, HTTPException
25
+ from fastapi.responses import StreamingResponse, JSONResponse
26
+
27
+ from agentcp.agentcp import AgentID
28
+ from agentcp.base.log import log_error, log_info, set_log_enabled
29
+ from ..config import API_BASE_URL
30
+ from ..presenter.configPresenter import configPresenter
31
+ from ..presenter.userPresenter import userPresenter
32
+ try:
33
+ from ..presenter.apikeyPresenter import apikeyPresenter
34
+ except ImportError:
35
+ apikeyPresenter = None
36
+ from ..http.async_http import AsyncHttpClient
37
+ try:
38
+ from ..presenter.creditsLockManager import credits_lock_manager
39
+ except ImportError:
40
+ class _CreditsLockManagerStub:
41
+ async def check_and_reserve_credits(self, **kwargs):
42
+ return {"success": True, "balance": 10000, "available": 10000, "reserved": 0}
43
+ async def confirm_credits_usage(self, **kwargs):
44
+ pass
45
+ async def release_reserved_credits(self, **kwargs):
46
+ pass
47
+ credits_lock_manager = _CreditsLockManagerStub()
48
+ from ..config import ENABLE_CLAUDE_PROXY_LOG
49
+ from ..version import __version__, __cmp_version__
50
+ # ✅ P0修复:导入异常处理器
51
+ try:
52
+ from ..utils.exception_handler import handle_exception, log_and_ignore, ExceptionCategory
53
+ except ImportError:
54
+ # 降级处理
55
+ def handle_exception(e, **kwargs):
56
+ print(f"[ClaudeProxy Error] {kwargs.get('context', '')}: {e}")
57
+ import traceback
58
+ traceback.print_exc()
59
+ def log_and_ignore(e, **kwargs):
60
+ pass
61
+ class ExceptionCategory:
62
+ NETWORK = "network"
63
+ AGENTCP = "agentcp"
64
+ CONCURRENT = "concurrent"
65
+
66
+ # 初始化 agentcp 的日志系统
67
+ import logging
68
+ set_log_enabled(True, logging.INFO)
69
+
70
+
71
+ # 全局变量
72
+ agentId: AgentID = None
73
+ # claude_agent_name 现在从配置中读取,不再硬编码
74
+ _rebuild_in_progress: bool = False # 防止重复触发重建
75
+ _rebuild_lock = threading.Lock() # 保护重建状态和全局变量的线程锁
76
+ _last_full_agentcp_rebuild_time: float = 0 # 上次完全重建AgentCP体系的时间
77
+ _full_agentcp_rebuild_cooldown: float = 1800 # 完全重建AgentCP的冷却时间(30分钟)
78
+ _agentcp_instance = None # 保持 AgentCP 实例引用,防止 GC
79
+ _consecutive_no_response_count: int = 0 # 连续无响应计数
80
+ _consecutive_no_response_threshold: int = 10 # 连续无响应触发重建的阈值
81
+ _no_response_count_lock = threading.Lock() # 保护计数器的线程锁
82
+
83
+
84
+ def _increment_no_response_count() -> int:
85
+ """
86
+ 增加连续无响应计数并返回当前值
87
+
88
+ Returns:
89
+ int: 当前连续无响应计数
90
+
91
+ 健壮性保证:
92
+ - 使用线程锁保护计数器操作
93
+ - 异常情况下返回安全的默认值
94
+ """
95
+ global _consecutive_no_response_count
96
+ try:
97
+ with _no_response_count_lock:
98
+ _consecutive_no_response_count += 1
99
+ current_count = _consecutive_no_response_count
100
+ print(f"[NoResponseCounter] 连续无响应计数: {current_count}/{_consecutive_no_response_threshold}")
101
+ return current_count
102
+ except Exception as e:
103
+ print(f"[NoResponseCounter] ❌ 增加计数异常: {e}")
104
+ # 异常情况下返回1,确保至少有一次计数
105
+ return 1
106
+
107
+
108
+ def _reset_no_response_count():
109
+ """
110
+ 重置连续无响应计数(在成功收到响应时调用)
111
+
112
+ 健壮性保证:
113
+ - 使用线程锁保护计数器操作
114
+ - 异常情况下静默处理,不影响主流程
115
+ """
116
+ global _consecutive_no_response_count
117
+ try:
118
+ with _no_response_count_lock:
119
+ if _consecutive_no_response_count > 0:
120
+ print(f"[NoResponseCounter] 重置连续无响应计数 (之前: {_consecutive_no_response_count})")
121
+ _consecutive_no_response_count = 0
122
+ except Exception as e:
123
+ print(f"[NoResponseCounter] ❌ 重置计数异常: {e}")
124
+ # 异常情况下尝试强制重置
125
+ try:
126
+ _consecutive_no_response_count = 0
127
+ except:
128
+ pass
129
+
130
+
131
+ def _should_trigger_rebuild() -> bool:
132
+ """
133
+ 判断是否应该触发重建(连续无响应次数达到阈值)
134
+
135
+ Returns:
136
+ bool: 是否应该触发重建
137
+
138
+ 健壮性保证:
139
+ - 使用线程锁保护计数器读取
140
+ - 异常情况下返回 False(不触发重建)
141
+ """
142
+ try:
143
+ with _no_response_count_lock:
144
+ return _consecutive_no_response_count >= _consecutive_no_response_threshold
145
+ except Exception as e:
146
+ print(f"[NoResponseCounter] ❌ 判断重建条件异常: {e}")
147
+ # 异常情况下不触发重建,保守处理
148
+ return False
149
+
150
+
151
+ def _clear_async_session_manager_cache():
152
+ """
153
+ 清空 AsyncSessionManager 的所有缓存
154
+
155
+ 这是一个统一的缓存清理入口,在以下场景调用:
156
+ - AgentID 重建时
157
+ - WebSocket 断开重连时
158
+ - 强制重建时
159
+
160
+ 清理内容:
161
+ - _sessions: api_key -> session_id 映射
162
+ - _session_info: session_id 信息
163
+ - _handler_registered: handler 注册状态
164
+ - _pending_requests: 待处理请求
165
+ - _request_result_map: 请求结果映射
166
+ - _session_handlers: session handler 引用
167
+ """
168
+ global async_session_manager
169
+
170
+ if async_session_manager is None:
171
+ print("[CacheClear] AsyncSessionManager 未初始化,跳过清理")
172
+ return
173
+
174
+ print("[CacheClear] 🧹 清理 AsyncSessionManager 缓存...")
175
+ try:
176
+ # 获取清理前的统计信息
177
+ sessions_count = len(async_session_manager._sessions)
178
+ pending_count = len(async_session_manager._pending_requests)
179
+ timestamps_count = len(async_session_manager._request_timestamps) if hasattr(async_session_manager, '_request_timestamps') else 0
180
+
181
+ # 清空所有缓存
182
+ async_session_manager._sessions.clear()
183
+ async_session_manager._session_info.clear()
184
+ async_session_manager._handler_registered.clear()
185
+ async_session_manager._pending_requests.clear()
186
+ async_session_manager._request_result_map.clear()
187
+ if hasattr(async_session_manager, '_session_handlers'):
188
+ async_session_manager._session_handlers.clear()
189
+ # ✅ 清理时间戳记录
190
+ if hasattr(async_session_manager, '_request_timestamps'):
191
+ async_session_manager._request_timestamps.clear()
192
+
193
+ print(f"[CacheClear] ✓ 已清理 {sessions_count} 个 session, {pending_count} 个待处理请求, {timestamps_count} 个时间戳记录")
194
+ except Exception as e:
195
+ print(f"[CacheClear] ⚠️ 清理异常(已忽略): {e}")
196
+
197
+
198
+ def _full_rebuild_agentcp_system() -> bool:
199
+ """
200
+ 完全重建 AgentCP 体系(唯一的同步重建入口)
201
+
202
+ 此方法用于 WebSocket 断开回调等同步场景。
203
+ 使用 _rebuild_lock 保护全局变量修改,防止竞态条件。
204
+
205
+ 流程:
206
+ 1. 清空 AsyncSessionManager 缓存
207
+ 2. 尝试使用现有 agentId.online() 重建
208
+ 3. 如果失败,创建全新的 AgentCP 实例
209
+
210
+ 冷却机制:30分钟内只执行一次(成功/失败都记录时间戳)
211
+
212
+ Returns:
213
+ bool: 是否重建成功
214
+ """
215
+ global agentId, _last_full_agentcp_rebuild_time, _agentcp_instance
216
+
217
+ print("[FullRebuild] 开始完全重建 AgentCP 体系...")
218
+ log_info("[FullRebuild] 开始完全重建 AgentCP 体系")
219
+
220
+ # 更新时间戳(在开始时更新,防止频繁重试)
221
+ _last_full_agentcp_rebuild_time = time.time()
222
+
223
+ try:
224
+ # 步骤1: 清空 AsyncSessionManager 缓存
225
+ print("[FullRebuild] 步骤1: 清空 session 缓存...")
226
+ _clear_async_session_manager_cache()
227
+
228
+ # 步骤2: 如果 agentId 存在,直接调用 online() 重建
229
+ # 使用锁保护对全局变量的读取
230
+ with _rebuild_lock:
231
+ current_agent = agentId
232
+
233
+ if current_agent is not None:
234
+ print(f"[FullRebuild] 步骤2: 使用 agentId.online() 重建: {current_agent.id}")
235
+
236
+ # agentId.online() 会:
237
+ # 1. 调用 session_manager.close_all_session() 关闭所有旧连接
238
+ # 2. 创建新的 SessionManager
239
+ # 3. 创建新的 MessageClient 并建立 WebSocket 连接
240
+ current_agent.online()
241
+
242
+ if current_agent.is_online_success:
243
+ # 【重要】AgentID 上线后统一入口 - 初始化监听器等
244
+ from ..presenter.agentIdPresenter import evol_agentId_online
245
+ evol_agentId_online(current_agent)
246
+
247
+ # 使用锁保护全局变量同步
248
+ with _rebuild_lock:
249
+ userPresenter.agentId = current_agent
250
+
251
+ # 注册断开回调
252
+ _register_disconnect_callback(current_agent, disable_auto_reconnect=True)
253
+
254
+ print(f"[FullRebuild] 重建成功(通过 online): {current_agent.id}")
255
+ log_info(f"[FullRebuild] 重建成功: {current_agent.id}")
256
+ return True
257
+ else:
258
+ print(f"[FullRebuild] online() 失败,尝试完全重建...")
259
+ # 继续执行完全重建
260
+
261
+ # 步骤3: 完全重建(agentId 不存在或 online 失败)
262
+ print("[FullRebuild] 步骤3: 完全重建 AgentCP...")
263
+ from pathlib import Path
264
+ from agentcp import AgentCP
265
+
266
+ evol_user_dir = Path.home() / ".evol"
267
+ agentcp_dir = evol_user_dir / "agentcp" / "AIDs"
268
+
269
+ if not agentcp_dir.exists():
270
+ print("[FullRebuild] 本地无 AID 数据目录")
271
+ return False
272
+
273
+ aid_dirs = [d for d in agentcp_dir.iterdir() if d.is_dir()]
274
+ if not aid_dirs:
275
+ print("[FullRebuild] 本地无可用 AID")
276
+ return False
277
+
278
+ first_aid = aid_dirs[0].name
279
+ print(f"[FullRebuild] 发现本地 AID: {first_aid}")
280
+
281
+ # 使用锁保护全局变量修改
282
+ with _rebuild_lock:
283
+ # 获取旧实例引用(注意:如果步骤2失败,current_agent.online() 已经尝试过重建,
284
+ # 其内部会调用 close_all_session(),所以这里不需要再次关闭)
285
+ old_agentcp = _agentcp_instance
286
+
287
+ # 清空旧引用
288
+ agentId = None
289
+ _agentcp_instance = None
290
+ userPresenter.agentId = None
291
+
292
+ # 等待资源释放
293
+ time.sleep(0.5)
294
+
295
+ # 创建全新的 AgentCP 实例
296
+ new_agentcp = AgentCP(evol_user_dir, seed_password="evol_user_default_password", debug=False)
297
+ new_agentid = new_agentcp.load_aid(first_aid)
298
+
299
+ if new_agentid is None:
300
+ print(f"[FullRebuild] 加载 AID 失败: {first_aid}")
301
+ return False
302
+
303
+ print(f"[FullRebuild] AID 加载成功: {new_agentid.id}")
304
+
305
+ # 上线
306
+ new_agentid.online()
307
+
308
+ if new_agentid.is_online_success:
309
+ # 【重要】AgentID 上线后统一入口 - 初始化监听器等
310
+ from ..presenter.agentIdPresenter import evol_agentId_online
311
+ evol_agentId_online(new_agentid)
312
+
313
+ # 使用锁保护全局变量更新
314
+ with _rebuild_lock:
315
+ agentId = new_agentid
316
+ _agentcp_instance = new_agentcp
317
+ userPresenter.agentId = new_agentid
318
+
319
+ _register_disconnect_callback(new_agentid, disable_auto_reconnect=True)
320
+
321
+ print(f"[FullRebuild] 完全重建成功!")
322
+ log_info(f"[FullRebuild] 完全重建成功: {new_agentid.id}")
323
+ return True
324
+ else:
325
+ print(f"[FullRebuild] AgentID 上线失败")
326
+ return False
327
+
328
+ except Exception as e:
329
+ print(f"[FullRebuild] 完全重建异常: {e}")
330
+ log_error(f"[FullRebuild] 完全重建异常: {e}")
331
+ import traceback
332
+ traceback.print_exc()
333
+ return False
334
+
335
+
336
+ async def force_rebuild_agentcp_system(bypass_cooldown: bool = True) -> dict:
337
+ """
338
+ 强制重建 AgentCP 体系(异步版本)
339
+
340
+ 用于 API 调用触发的重建场景。
341
+ 使用 _rebuild_lock 保护全局变量修改,防止竞态条件。
342
+
343
+ 流程:
344
+ 1. 清空 AsyncSessionManager 缓存
345
+ 2. 尝试使用现有 agentId.online() 重建
346
+ 3. 如果失败,尝试重新登录或从本地加载 AID
347
+
348
+ Args:
349
+ bypass_cooldown: 是否绕过30分钟冷却时间(默认True)
350
+
351
+ Returns:
352
+ dict: {
353
+ "success": bool,
354
+ "message": str,
355
+ "agent_id": str or None,
356
+ "cleanup_details": dict
357
+ }
358
+ """
359
+ global agentId, _last_full_agentcp_rebuild_time, _agentcp_instance, _rebuild_in_progress
360
+
361
+ print("\n" + "=" * 80)
362
+ print("[ForceRebuild] 开始强制重建 AgentCP 体系")
363
+ print("=" * 80)
364
+ log_info("[ForceRebuild] 开始强制重建 AgentCP 体系")
365
+
366
+ cleanup_details = {
367
+ "session_cache_cleared": False,
368
+ "agentid_online_success": False,
369
+ "old_agent_id": None,
370
+ "new_agent_id": None
371
+ }
372
+
373
+ # 检查冷却时间(可选绕过)
374
+ if not bypass_cooldown:
375
+ time_since_last = time.time() - _last_full_agentcp_rebuild_time
376
+ if time_since_last < _full_agentcp_rebuild_cooldown:
377
+ remaining = (_full_agentcp_rebuild_cooldown - time_since_last) / 60
378
+ with _rebuild_lock:
379
+ current_agent_id = agentId.id if agentId else None
380
+ return {
381
+ "success": False,
382
+ "message": f"重建冷却中,剩余 {remaining:.1f} 分钟",
383
+ "agent_id": current_agent_id,
384
+ "cleanup_details": cleanup_details
385
+ }
386
+
387
+ # 使用锁防止并发重建
388
+ with _rebuild_lock:
389
+ if _rebuild_in_progress:
390
+ current_agent_id = agentId.id if agentId else None
391
+ return {
392
+ "success": False,
393
+ "message": "重建已在进行中,请稍后重试",
394
+ "agent_id": current_agent_id,
395
+ "cleanup_details": cleanup_details
396
+ }
397
+ _rebuild_in_progress = True
398
+
399
+ try:
400
+ # 更新重建时间戳(在开始时更新,防止频繁重试)
401
+ _last_full_agentcp_rebuild_time = time.time()
402
+
403
+ # ========================
404
+ # 步骤1: 清空 AsyncSessionManager 缓存
405
+ # ========================
406
+ print("[ForceRebuild] 步骤1: 清空 session 缓存...")
407
+ _clear_async_session_manager_cache()
408
+ cleanup_details["session_cache_cleared"] = True
409
+
410
+ # ========================
411
+ # 步骤2: 使用 agentId.online() 重建
412
+ # ========================
413
+ # 使用锁保护读取全局变量
414
+ with _rebuild_lock:
415
+ current_agent = agentId
416
+ if current_agent is not None:
417
+ cleanup_details["old_agent_id"] = current_agent.id
418
+
419
+ if current_agent is not None:
420
+ print(f"[ForceRebuild] 步骤2: 使用 agentId.online() 重建: {current_agent.id}")
421
+
422
+ # agentId.online() 会:
423
+ # 1. 调用 session_manager.close_all_session() 关闭所有旧连接
424
+ # 2. 创建新的 SessionManager
425
+ # 3. 创建新的 MessageClient 并建立 WebSocket 连接
426
+ await asyncio.to_thread(current_agent.online)
427
+
428
+ if current_agent.is_online_success:
429
+ # 【重要】AgentID 上线后统一入口 - 初始化监听器等
430
+ from ..presenter.agentIdPresenter import evol_agentId_online
431
+ evol_agentId_online(current_agent)
432
+
433
+ with _rebuild_lock:
434
+ userPresenter.agentId = current_agent
435
+ _register_disconnect_callback(current_agent, disable_auto_reconnect=True)
436
+ cleanup_details["new_agent_id"] = current_agent.id
437
+ cleanup_details["agentid_online_success"] = True
438
+
439
+ print("=" * 80)
440
+ print(f"[ForceRebuild] 重建成功(通过 online)!")
441
+ print(f"[ForceRebuild] - AgentID: {current_agent.id}")
442
+ print("=" * 80 + "\n")
443
+ log_info(f"[ForceRebuild] 重建成功: {current_agent.id}")
444
+
445
+ return {
446
+ "success": True,
447
+ "message": "强制重建 AgentCP 体系成功",
448
+ "agent_id": current_agent.id,
449
+ "cleanup_details": cleanup_details
450
+ }
451
+ else:
452
+ print(f"[ForceRebuild] online() 失败,尝试完全重建...")
453
+ # 继续执行完全重建
454
+
455
+ # ========================
456
+ # 步骤3: 完全重建(agentId 不存在或 online 失败)
457
+ # ========================
458
+ print("[ForceRebuild] 步骤3: 完全重建 AgentCP...")
459
+
460
+ # 使用锁保护全局变量修改
461
+ # 注意:如果步骤2失败,current_agent.online() 已经尝试过重建,
462
+ # 其内部会调用 close_all_session(),所以这里不需要再次关闭
463
+ with _rebuild_lock:
464
+ old_agentcp = _agentcp_instance
465
+ agentId = None
466
+ _agentcp_instance = None
467
+ userPresenter.agentId = None
468
+
469
+ # 等待资源释放
470
+ await asyncio.sleep(0.5)
471
+
472
+ # 检查用户登录状态
473
+ if userPresenter.is_logged_in():
474
+ print("[ForceRebuild] - 用户已登录,调用 user_agent_login() 重新认证...")
475
+ login_result = await userPresenter.user_agent_login()
476
+
477
+ if login_result.get("status") == "success":
478
+ with _rebuild_lock:
479
+ agentId = userPresenter.agentId
480
+ # 同步 userPresenter 的 AgentCP 实例引用
481
+ _agentcp_instance = userPresenter._agentcp_instance
482
+ new_agent = agentId
483
+ if new_agent:
484
+ _register_disconnect_callback(new_agent, disable_auto_reconnect=True)
485
+ cleanup_details["new_agent_id"] = new_agent.id
486
+ cleanup_details["agentid_online_success"] = True
487
+
488
+ print("=" * 80)
489
+ print(f"[ForceRebuild] 重建成功(通过重新认证)!")
490
+ print(f"[ForceRebuild] - 旧 AgentID: {cleanup_details['old_agent_id']}")
491
+ print(f"[ForceRebuild] - 新 AgentID: {cleanup_details['new_agent_id']}")
492
+ print("=" * 80 + "\n")
493
+ log_info(f"[ForceRebuild] 重建成功: {cleanup_details['old_agent_id']} -> {cleanup_details['new_agent_id']}")
494
+
495
+ return {
496
+ "success": True,
497
+ "message": "强制重建 AgentCP 体系成功",
498
+ "agent_id": new_agent.id,
499
+ "cleanup_details": cleanup_details
500
+ }
501
+ else:
502
+ error_msg = login_result.get("error", "未知错误")
503
+ print(f"[ForceRebuild] 重新认证失败: {error_msg}")
504
+ else:
505
+ print("[ForceRebuild] - 用户未登录,尝试从本地加载 AID...")
506
+
507
+ # 从本地加载 AID
508
+ from pathlib import Path
509
+ from agentcp import AgentCP
510
+
511
+ evol_user_dir = Path.home() / ".evol"
512
+ agentcp_dir = evol_user_dir / "agentcp" / "AIDs"
513
+
514
+ if not agentcp_dir.exists():
515
+ return {
516
+ "success": False,
517
+ "message": "本地无 AID 数据目录,请登录后重试",
518
+ "agent_id": None,
519
+ "cleanup_details": cleanup_details
520
+ }
521
+
522
+ aid_dirs = [d for d in agentcp_dir.iterdir() if d.is_dir()]
523
+ if not aid_dirs:
524
+ return {
525
+ "success": False,
526
+ "message": "本地无可用 AID,请登录后重试",
527
+ "agent_id": None,
528
+ "cleanup_details": cleanup_details
529
+ }
530
+
531
+ first_aid = aid_dirs[0].name
532
+ print(f"[ForceRebuild] 发现本地 AID: {first_aid}")
533
+
534
+ new_agentcp = AgentCP(evol_user_dir, seed_password="evol_user_default_password", debug=False, run_proxy=False)
535
+ new_agentid = new_agentcp.load_aid(first_aid)
536
+
537
+ if new_agentid is None:
538
+ return {
539
+ "success": False,
540
+ "message": f"加载 AID 失败: {first_aid}",
541
+ "agent_id": None,
542
+ "cleanup_details": cleanup_details
543
+ }
544
+
545
+ print(f"[ForceRebuild] AID 加载成功: {new_agentid.id}")
546
+
547
+ # 上线
548
+ await asyncio.to_thread(new_agentid.online)
549
+
550
+ if new_agentid.is_online_success:
551
+ # 【重要】AgentID 上线后统一入口 - 初始化监听器等
552
+ from ..presenter.agentIdPresenter import evol_agentId_online
553
+ evol_agentId_online(new_agentid)
554
+
555
+ # 使用锁保护全局变量更新
556
+ with _rebuild_lock:
557
+ agentId = new_agentid
558
+ _agentcp_instance = new_agentcp
559
+ userPresenter.agentId = new_agentid
560
+
561
+ _register_disconnect_callback(new_agentid, disable_auto_reconnect=True)
562
+ cleanup_details["new_agent_id"] = new_agentid.id
563
+ cleanup_details["agentid_online_success"] = True
564
+
565
+ print("=" * 80)
566
+ print(f"[ForceRebuild] 完全重建成功!")
567
+ print(f"[ForceRebuild] - 旧 AgentID: {cleanup_details['old_agent_id']}")
568
+ print(f"[ForceRebuild] - 新 AgentID: {cleanup_details['new_agent_id']}")
569
+ print("=" * 80 + "\n")
570
+ log_info(f"[ForceRebuild] 完全重建成功: {new_agentid.id}")
571
+
572
+ return {
573
+ "success": True,
574
+ "message": "强制重建 AgentCP 体系成功",
575
+ "agent_id": new_agentid.id,
576
+ "cleanup_details": cleanup_details
577
+ }
578
+ else:
579
+ return {
580
+ "success": False,
581
+ "message": "AID 上线失败",
582
+ "agent_id": None,
583
+ "cleanup_details": cleanup_details
584
+ }
585
+
586
+ except Exception as e:
587
+ import traceback
588
+ error_traceback = traceback.format_exc()
589
+ print(f"[ForceRebuild] 强制重建异常: {e}")
590
+ print(f"[ForceRebuild] 异常堆栈:\n{error_traceback}")
591
+ log_error(f"[ForceRebuild] 强制重建异常: {e}")
592
+
593
+ with _rebuild_lock:
594
+ current_agent_id = agentId.id if agentId else None
595
+
596
+ return {
597
+ "success": False,
598
+ "message": f"强制重建异常: {str(e)}",
599
+ "agent_id": current_agent_id,
600
+ "cleanup_details": cleanup_details
601
+ }
602
+ finally:
603
+ with _rebuild_lock:
604
+ _rebuild_in_progress = False
605
+
606
+
607
+ def _on_websocket_disconnect(agent_id: str, server_url: str, code: int, reason: str):
608
+ """
609
+ WebSocket 断开回调 - 当连接断开时自动触发重建
610
+
611
+ 此函数由 MessageClient 在 WebSocket 断开时调用。
612
+ 它会在新线程中触发完全重建 AgentCP 体系(30分钟内只执行一次)。
613
+
614
+ 健壮性保证:
615
+ - 所有参数访问都有安全保护
616
+ - 使用线程锁防止竞态条件
617
+ - 即使重建线程启动失败也不会崩溃
618
+ """
619
+ global _rebuild_in_progress, _last_full_agentcp_rebuild_time
620
+
621
+ try:
622
+ print(f"[DisconnectCallback] WebSocket 断开!")
623
+ print(f"[DisconnectCallback] agent_id: {agent_id}")
624
+ print(f"[DisconnectCallback] server_url: {server_url}")
625
+ print(f"[DisconnectCallback] code: {code}")
626
+ print(f"[DisconnectCallback] reason: {reason}")
627
+ log_info(f"[DisconnectCallback] WebSocket 断开: agent={agent_id}, code={code}, reason={reason}")
628
+ except Exception as log_err:
629
+ print(f"[DisconnectCallback] 日志打印异常(不影响处理): {log_err}")
630
+
631
+ # 使用锁保护,防止竞态条件
632
+ try:
633
+ with _rebuild_lock:
634
+ if _rebuild_in_progress:
635
+ print("[DisconnectCallback] 重建已在进行中,跳过")
636
+ return
637
+ _rebuild_in_progress = True
638
+ except Exception as lock_err:
639
+ print(f"[DisconnectCallback] 获取锁异常: {lock_err}")
640
+ return
641
+
642
+ try:
643
+ # 在新线程中执行重建,避免阻塞 WebSocket 回调
644
+ def trigger_rebuild():
645
+ global _rebuild_in_progress, _last_full_agentcp_rebuild_time
646
+ try:
647
+ print("[DisconnectCallback] 触发后台重建...")
648
+ log_info("[DisconnectCallback] 触发后台重建...")
649
+
650
+ # 检查冷却时间(30分钟内只执行一次)
651
+ time_since_last = time.time() - _last_full_agentcp_rebuild_time
652
+ if time_since_last < _full_agentcp_rebuild_cooldown:
653
+ remaining = (_full_agentcp_rebuild_cooldown - time_since_last) / 60
654
+ print(f"[DisconnectCallback] 完全重建冷却中,剩余 {remaining:.1f} 分钟")
655
+ log_info(f"[DisconnectCallback] 完全重建冷却中,剩余 {remaining:.1f} 分钟")
656
+ return
657
+
658
+ # 直接执行完全重建 AgentCP 体系
659
+ print("[DisconnectCallback] 触发完全重建 AgentCP 体系...")
660
+ log_info("[DisconnectCallback] 触发完全重建 AgentCP 体系")
661
+ full_rebuild_success = _full_rebuild_agentcp_system()
662
+ if full_rebuild_success:
663
+ print("[DisconnectCallback] 完全重建 AgentCP 体系成功")
664
+ log_info("[DisconnectCallback] 完全重建 AgentCP 体系成功")
665
+ else:
666
+ print("[DisconnectCallback] 完全重建 AgentCP 体系失败")
667
+ log_error("[DisconnectCallback] 完全重建 AgentCP 体系失败")
668
+
669
+ except Exception as e:
670
+ print(f"[DisconnectCallback] 重建异常: {e}")
671
+ log_error(f"[DisconnectCallback] 重建异常: {e}")
672
+ try:
673
+ import traceback
674
+ traceback.print_exc()
675
+ except:
676
+ pass
677
+ finally:
678
+ # 使用锁保护
679
+ try:
680
+ with _rebuild_lock:
681
+ _rebuild_in_progress = False
682
+ except Exception:
683
+ _rebuild_in_progress = False
684
+
685
+ rebuild_thread = threading.Thread(target=trigger_rebuild, daemon=True)
686
+ rebuild_thread.start()
687
+ print("[DisconnectCallback] 已启动重建线程")
688
+
689
+ except Exception as e:
690
+ # 使用锁保护
691
+ try:
692
+ with _rebuild_lock:
693
+ _rebuild_in_progress = False
694
+ except Exception:
695
+ _rebuild_in_progress = False
696
+ print(f"[DisconnectCallback] 启动重建线程失败: {e}")
697
+ log_error(f"[DisconnectCallback] 启动重建线程失败: {e}")
698
+
699
+
700
+ def _register_disconnect_callback(aid: AgentID, disable_auto_reconnect: bool = True):
701
+ """
702
+ 为 AgentID 注册断开回调
703
+
704
+ 健壮性保证:
705
+ - 所有操作都有异常保护
706
+ - 即使注册失败也不会崩溃
707
+ - 禁用自动重连时容错处理
708
+
709
+ Args:
710
+ aid: AgentID 实例
711
+ disable_auto_reconnect: 是否禁用 MessageClient 的自动重连(避免双重重建冲突)
712
+ """
713
+ try:
714
+ if aid is None:
715
+ print("[RegisterCallback] aid 为空,跳过注册")
716
+ return
717
+
718
+ if hasattr(aid, 'set_disconnect_callback'):
719
+ try:
720
+ aid.set_disconnect_callback(_on_websocket_disconnect)
721
+ aid_id = aid.id if hasattr(aid, 'id') else 'unknown'
722
+ print(f"[RegisterCallback] 已为 AgentID({aid_id}) 注册断开回调")
723
+ log_info(f"[RegisterCallback] 已为 AgentID({aid_id}) 注册断开回调")
724
+ except Exception as cb_err:
725
+ print(f"[RegisterCallback] 注册断开回调失败: {cb_err}")
726
+
727
+ # 禁用 MessageClient 的自动重连,避免双重重建冲突
728
+ if disable_auto_reconnect:
729
+ try:
730
+ if aid.session_manager and hasattr(aid.session_manager, 'message_client_map'):
731
+ for server_url, mc in aid.session_manager.message_client_map.items():
732
+ try:
733
+ if mc and hasattr(mc, 'config') and mc.config and hasattr(mc.config, 'auto_reconnect'):
734
+ mc.config.auto_reconnect = False
735
+ print(f"[RegisterCallback] 已禁用 MessageClient({server_url}) 的自动重连")
736
+ log_info(f"[RegisterCallback] 已禁用 MessageClient({server_url}) 的自动重连")
737
+ except Exception as mc_err:
738
+ print(f"[RegisterCallback] 禁用 MessageClient({server_url}) auto_reconnect 失败: {mc_err}")
739
+ except Exception as sm_err:
740
+ print(f"[RegisterCallback] 访问 session_manager 异常: {sm_err}")
741
+
742
+ except Exception as e:
743
+ print(f"[RegisterCallback] 注册回调过程异常: {e}")
744
+ try:
745
+ import traceback
746
+ traceback.print_exc()
747
+ except:
748
+ pass
749
+
750
+
751
+ class ApiKeyManager:
752
+ """API Key 管理器 - 智能缓存和校验策略"""
753
+
754
+ def __init__(self):
755
+ self._api_keys = {} # {apiKey: {"status": 1/0, "id": xxx, "name": xxx}}
756
+ self._last_update_time = 0 # 上次更新时间
757
+ self._cache_ttl = 300 # 缓存5分钟
758
+ self._lock = asyncio.Lock() # 异步锁
759
+
760
+ async def validate_api_key(self, api_key: str) -> dict:
761
+ """
762
+ 校验 API Key
763
+
764
+ Args:
765
+ api_key: 要校验的 API Key
766
+
767
+ Returns:
768
+ dict: {"valid": bool, "from_cache": bool, "error": str}
769
+ """
770
+ async with self._lock:
771
+ current_time = time.time()
772
+
773
+ # 策略1: 如果缓存中存在且启用,直接返回(缓存命中)
774
+ if api_key in self._api_keys:
775
+ cached_key = self._api_keys[api_key]
776
+ # 检查缓存是否过期
777
+ if current_time - self._last_update_time < self._cache_ttl:
778
+ if cached_key.get("status") == 1:
779
+ log_info(f"[ApiKeyManager] API Key 缓存命中且启用: {api_key[:20]}...")
780
+ return {"valid": True, "from_cache": True, "error": None}
781
+ else:
782
+ log_info(f"[ApiKeyManager] API Key 缓存命中但已禁用: {api_key[:20]}...")
783
+ return {"valid": False, "from_cache": True, "error": "API Key 已被禁用"}
784
+
785
+ # 策略2: 缓存未命中或已过期,同步获取最新数据
786
+ log_info(f"[ApiKeyManager] API Key 缓存未命中,同步获取最新数据")
787
+ success = await self._refresh_api_keys()
788
+
789
+ if not success:
790
+ # 刷新失败,如果缓存中有这个key,使用缓存数据
791
+ if api_key in self._api_keys:
792
+ cached_key = self._api_keys[api_key]
793
+ if cached_key.get("status") == 1:
794
+ log_info(f"[ApiKeyManager] API Key 刷新失败,使用缓存数据")
795
+ return {"valid": True, "from_cache": True, "error": None}
796
+ else:
797
+ return {"valid": False, "from_cache": True, "error": "API Key 已被禁用"}
798
+ else:
799
+ return {"valid": False, "from_cache": False, "error": "无法验证 API Key,请稍后重试"}
800
+
801
+ # 策略3: 刷新成功,检查 API Key 是否存在且启用
802
+ if api_key in self._api_keys:
803
+ api_key_data = self._api_keys[api_key]
804
+ if api_key_data.get("status") == 1:
805
+ log_info(f"[ApiKeyManager] API Key 校验通过: {api_key[:20]}...")
806
+ return {"valid": True, "from_cache": False, "error": None}
807
+ else:
808
+ log_info(f"[ApiKeyManager] API Key 已禁用: {api_key[:20]}...")
809
+ return {"valid": False, "from_cache": False, "error": "API Key 已被禁用"}
810
+ else:
811
+ log_info(f"[ApiKeyManager] API Key 不存在: {api_key[:20]}...")
812
+ return {"valid": False, "from_cache": False, "error": "API Key 不存在或未启用"}
813
+
814
+ async def _refresh_api_keys(self) -> bool:
815
+ """
816
+ 从 apikeyPresenter 刷新 API Key 列表
817
+
818
+ Returns:
819
+ bool: 是否刷新成功
820
+ """
821
+ try:
822
+ log_info(f"[ApiKeyManager] 开始刷新 API Key 列表")
823
+
824
+ # 调用 apikeyPresenter 获取 API Key 列表
825
+ result = await apikeyPresenter.get_api_key_list()
826
+
827
+ if result and result.get("status") == "success":
828
+ response_data = result.get("data", {})
829
+ if response_data.get("code") == 200:
830
+ api_key_list = response_data.get("data", [])
831
+
832
+ # 重建缓存
833
+ new_cache = {}
834
+ for key_data in api_key_list:
835
+ api_key = key_data.get("apiKey")
836
+ if api_key:
837
+ new_cache[api_key] = {
838
+ "id": key_data.get("id"),
839
+ "name": key_data.get("name"),
840
+ "status": key_data.get("status"),
841
+ "remark": key_data.get("remark")
842
+ }
843
+
844
+ self._api_keys = new_cache
845
+ self._last_update_time = time.time()
846
+
847
+ log_info(f"[ApiKeyManager] API Key 列表刷新成功,共 {len(new_cache)} 个")
848
+ return True
849
+ else:
850
+ error_msg = response_data.get("msg", "未知错误")
851
+ log_error(f"[ApiKeyManager] API 返回错误: {error_msg}")
852
+ return False
853
+ else:
854
+ error_msg = result.get("error", "未知错误")
855
+ log_error(f"[ApiKeyManager] 获取 API Key 列表失败: {error_msg}")
856
+ return False
857
+
858
+ except Exception as e:
859
+ log_error(f"[ApiKeyManager] 刷新 API Key 列表异常: {str(e)}")
860
+ return False
861
+
862
+ def get_cached_keys_count(self) -> int:
863
+ """获取缓存的 API Key 数量"""
864
+ return len(self._api_keys)
865
+
866
+ def clear_cache(self):
867
+ """清除缓存"""
868
+ self._api_keys = {}
869
+ self._last_update_time = 0
870
+ log_info(f"[ApiKeyManager] API Key 缓存已清除")
871
+
872
+
873
+ # 全局 API Key 管理器实例
874
+ api_key_manager = ApiKeyManager()
875
+
876
+
877
+ class BalanceManager:
878
+ """余额管理器 - 分级缓存策略"""
879
+
880
+ # 分级缓存规则(根据积分余额动态设置缓存TTL)
881
+ TIERED_CACHE_RULES = [
882
+ {"min_credits": 50000, "ttl_seconds": 14400, "description": "≥5万分: 缓存4小时"},
883
+ {"min_credits": 30000, "ttl_seconds": 5400, "description": "3-5万分: 缓存90分钟"},
884
+ {"min_credits": 20000, "ttl_seconds": 2700, "description": "2-3万分: 缓存45分钟"},
885
+ {"min_credits": 10000, "ttl_seconds": 1800, "description": "1-2万分: 缓存30分钟"},
886
+ {"min_credits": 5000, "ttl_seconds": 600, "description": "5千-1万分: 缓存10分钟"},
887
+ {"min_credits": 2000, "ttl_seconds": 300, "description": "2千-5千分: 缓存5分钟"},
888
+ {"min_credits": 0, "ttl_seconds": 0, "description": "<2千分: 无缓存(每次实时查询)"},
889
+ ]
890
+
891
+ def __init__(self):
892
+ self._balance = None # 当前余额
893
+ self._last_update_time = 0 # 上次更新时间
894
+ self._cache_ttl = 0 # 当前缓存的TTL(秒)
895
+ self._lock = asyncio.Lock() # 异步锁
896
+ self._is_refreshing = False # 是否正在刷新
897
+ self._last_fetch_failed = False # 上次获取是否失败
898
+ self._force_sync_until = 0 # 强制同步模式的截止时间(时间戳)
899
+ self._users_count_on_same_device = 0 # 同设备8小时内的用户数
900
+ self._consecutive_passthrough_count = 0 # 连续放行次数(未获取到积分时)
901
+ self._max_passthrough_count = 5 # 最大连续放行次数
902
+
903
+ @staticmethod
904
+ def _get_cache_ttl(credits: int) -> int:
905
+ """
906
+ 根据积分余额计算缓存TTL
907
+
908
+ Args:
909
+ credits: 积分余额
910
+
911
+ Returns:
912
+ int: 缓存TTL(秒),0表示不缓存
913
+ """
914
+ for rule in BalanceManager.TIERED_CACHE_RULES:
915
+ if credits >= rule["min_credits"]:
916
+ return rule["ttl_seconds"]
917
+ return 0 # 默认不缓存
918
+
919
+ def _is_cache_valid(self) -> bool:
920
+ """
921
+ 检查缓存是否有效
922
+
923
+ Returns:
924
+ bool: 缓存是否有效
925
+ """
926
+ if self._balance is None:
927
+ return False
928
+
929
+ # TTL为0表示不缓存,直接返回False
930
+ if self._cache_ttl == 0:
931
+ return False
932
+
933
+ # 检查缓存是否过期
934
+ cache_age = time.time() - self._last_update_time
935
+ return cache_age < self._cache_ttl
936
+
937
+ async def get_balance(self, customer_aid: str, force_sync: bool = False) -> dict:
938
+ """
939
+ 获取余额(分级缓存策略)
940
+
941
+ 策略规则:
942
+ 1. 首次获取失败 → 直接放行,异步重试获取
943
+ 2. 连续放行超过5次 → 进入同步拦截模式
944
+ 3. 有缓存时:
945
+ - 缓存有效 → 返回缓存 + 异步刷新
946
+ - 缓存过期 → 同步查询,失败则使用缓存放行
947
+ 4. 积分 == 0 → 立即拒绝
948
+
949
+ Args:
950
+ customer_aid: 客户AID
951
+ force_sync: 是否强制同步获取
952
+
953
+ Returns:
954
+ dict: {"balance": float, "from_cache": bool, "error": str, "usersCountOnSameDeviceWithin8Hours": int}
955
+ """
956
+ async with self._lock:
957
+ current_time = time.time()
958
+
959
+ # 检查是否已连续放行超过5次,需要进入同步拦截模式
960
+ if self._balance is None and self._consecutive_passthrough_count >= self._max_passthrough_count:
961
+ log_error(f"[BalanceManager] 连续放行{self._consecutive_passthrough_count}次,进入同步拦截模式")
962
+ # 尝试同步获取
963
+ fetch_result = await self._fetch_balance(customer_aid)
964
+ if fetch_result is not None:
965
+ balance = fetch_result.get("credits", 0)
966
+ users_count = fetch_result.get("usersCountOnSameDeviceWithin8Hours", 0)
967
+ self._balance = balance
968
+ self._users_count_on_same_device = users_count
969
+ self._last_update_time = current_time
970
+ self._cache_ttl = self._get_cache_ttl(balance)
971
+ self._last_fetch_failed = False
972
+ self._consecutive_passthrough_count = 0 # 重置计数
973
+
974
+ if balance == 0:
975
+ return {"balance": 0, "from_cache": False, "error": "积分余额为0", "usersCountOnSameDeviceWithin8Hours": users_count}
976
+ return {"balance": balance, "from_cache": False, "error": None, "usersCountOnSameDeviceWithin8Hours": users_count}
977
+ else:
978
+ # 同步获取也失败,拦截请求
979
+ return {"balance": 0, "from_cache": False, "error": "余额服务不可用,请稍后重试", "usersCountOnSameDeviceWithin8Hours": 0}
980
+
981
+ # 首次获取或无缓存 - 尝试获取,失败则放行
982
+ if self._balance is None:
983
+ log_info(f"[BalanceManager] 首次获取余额 (已放行{self._consecutive_passthrough_count}次)")
984
+ fetch_result = await self._fetch_balance(customer_aid)
985
+
986
+ if fetch_result is not None:
987
+ balance = fetch_result.get("credits", 0)
988
+ users_count = fetch_result.get("usersCountOnSameDeviceWithin8Hours", 0)
989
+ self._balance = balance
990
+ self._users_count_on_same_device = users_count
991
+ self._last_update_time = current_time
992
+ self._cache_ttl = self._get_cache_ttl(balance)
993
+ self._last_fetch_failed = False
994
+ self._consecutive_passthrough_count = 0 # 成功获取,重置计数
995
+
996
+ ttl_desc = f"{self._cache_ttl}秒" if self._cache_ttl > 0 else "无缓存"
997
+ log_info(f"[BalanceManager] 积分: {balance}, 缓存策略: {ttl_desc}")
998
+
999
+ if balance == 0:
1000
+ return {"balance": 0, "from_cache": False, "error": "积分余额为0", "usersCountOnSameDeviceWithin8Hours": users_count}
1001
+ return {"balance": balance, "from_cache": False, "error": None, "usersCountOnSameDeviceWithin8Hours": users_count}
1002
+ else:
1003
+ # 获取失败,放行并异步重试
1004
+ self._consecutive_passthrough_count += 1
1005
+ self._last_fetch_failed = True
1006
+ log_info(f"[BalanceManager] 余额获取失败,放行请求 (第{self._consecutive_passthrough_count}次),启动异步重试")
1007
+ asyncio.create_task(self._async_retry_fetch(customer_aid))
1008
+ return {"balance": -1, "from_cache": False, "error": None, "usersCountOnSameDeviceWithin8Hours": 0, "service_unavailable": True}
1009
+
1010
+ # 有缓存余额的情况
1011
+ # 检查缓存是否有效
1012
+ if self._is_cache_valid():
1013
+ # 缓存有效,返回缓存值并异步刷新
1014
+ cache_age = current_time - self._last_update_time
1015
+ log_info(f"[BalanceManager] 缓存有效 (积分: {self._balance}, 缓存年龄: {cache_age:.0f}秒, TTL: {self._cache_ttl}秒)")
1016
+
1017
+ # 启动异步刷新任务(不等待)
1018
+ if not self._is_refreshing:
1019
+ asyncio.create_task(self._async_refresh_balance(customer_aid))
1020
+
1021
+ return {"balance": self._balance, "from_cache": True, "error": None, "usersCountOnSameDeviceWithin8Hours": self._users_count_on_same_device}
1022
+
1023
+ # 策略3: 缓存已过期 - 同步获取最新余额
1024
+ else:
1025
+ cache_age = current_time - self._last_update_time
1026
+ log_info(f"[BalanceManager] 缓存已过期 (积分: {self._balance}, 缓存年龄: {cache_age:.0f}秒, TTL: {self._cache_ttl}秒),同步查询")
1027
+ fetch_result = await self._fetch_balance(customer_aid)
1028
+
1029
+ if fetch_result is not None:
1030
+ balance = fetch_result.get("credits", 0)
1031
+ users_count = fetch_result.get("usersCountOnSameDeviceWithin8Hours", 0)
1032
+
1033
+ self._balance = balance
1034
+ self._users_count_on_same_device = users_count
1035
+ self._last_update_time = current_time
1036
+ self._cache_ttl = self._get_cache_ttl(balance)
1037
+ self._last_fetch_failed = False
1038
+
1039
+ ttl_desc = f"{self._cache_ttl}秒" if self._cache_ttl > 0 else "无缓存"
1040
+ log_info(f"[BalanceManager] 积分: {balance}, 缓存策略: {ttl_desc}")
1041
+
1042
+ if balance == 0:
1043
+ return {"balance": 0, "from_cache": False, "error": "积分余额为0", "usersCountOnSameDeviceWithin8Hours": users_count}
1044
+
1045
+ return {"balance": balance, "from_cache": False, "error": None, "usersCountOnSameDeviceWithin8Hours": users_count}
1046
+ else:
1047
+ # 刷新失败,使用缓存数据放行
1048
+ self._last_fetch_failed = True
1049
+ log_info(f"[BalanceManager] 缓存过期获取失败,使用缓存数据放行 (积分: {self._balance})")
1050
+ return {"balance": self._balance, "from_cache": True, "error": "余额服务异常,使用缓存数据", "usersCountOnSameDeviceWithin8Hours": self._users_count_on_same_device}
1051
+
1052
+ async def _fetch_balance(self, customer_aid: str) -> Optional[dict]:
1053
+ """
1054
+ 从API获取余额
1055
+ 调用 userPresenter 的方法获取用户积分余额
1056
+
1057
+ Returns:
1058
+ Optional[dict]: 包含 credits 和 usersCountOnSameDeviceWithin8Hours 的字典,失败返回None
1059
+ """
1060
+ try:
1061
+ print(f"[BalanceManager] 调用 userPresenter.get_credits_balance() 获取积分余额")
1062
+
1063
+ # 调用 userPresenter 的方法获取积分余额
1064
+ result = await userPresenter.get_credits_balance()
1065
+
1066
+ if result.get("status") == "success":
1067
+ data = result.get("data", {})
1068
+ if data.get("code") == 200:
1069
+ inner_data = data.get("data", {})
1070
+ credits = inner_data.get("credits", 0)
1071
+ users_count = inner_data.get("usersCountOnSameDeviceWithin8Hours", 0)
1072
+ print(f"[BalanceManager] 积分余额获取成功: {credits}, 同设备用户数: {users_count}")
1073
+ return {
1074
+ "credits": credits,
1075
+ "usersCountOnSameDeviceWithin8Hours": users_count
1076
+ }
1077
+ else:
1078
+ # API返回业务错误码
1079
+ api_code = data.get("code", "")
1080
+ error_msg = data.get("msg", "未知错误")
1081
+ print(f"[BalanceManager] API返回错误: {error_msg} (code: {api_code})")
1082
+ print(f"[BalanceManager] 完整响应: {data}")
1083
+ return None
1084
+ else:
1085
+ # 请求失败
1086
+ error_msg = result.get("error", "未知错误")
1087
+ error_code = result.get("error_code", "")
1088
+ need_relogin = result.get("need_relogin", False)
1089
+ print(f"[BalanceManager] 获取积分余额失败: {error_msg}")
1090
+ if error_code:
1091
+ print(f"[BalanceManager] 错误码: {error_code}")
1092
+ if need_relogin:
1093
+ print(f"[BalanceManager] 需要重新登录")
1094
+ print(f"[BalanceManager] 完整结果: {result}")
1095
+ return None
1096
+
1097
+ except Exception as e:
1098
+ import traceback
1099
+ log_error(f"[BalanceManager] 获取余额异常: {str(e)}")
1100
+ print(f"[BalanceManager] 异常堆栈:\n{traceback.format_exc()}")
1101
+ return None
1102
+
1103
+ async def _async_refresh_balance(self, customer_aid: str):
1104
+ """异步刷新余额(后台任务)"""
1105
+ try:
1106
+ self._is_refreshing = True
1107
+ log_info(f"[BalanceManager] 开始异步刷新余额")
1108
+
1109
+ fetch_result = await self._fetch_balance(customer_aid)
1110
+ if fetch_result is not None:
1111
+ async with self._lock:
1112
+ old_balance = self._balance
1113
+ balance = fetch_result.get("credits", 0)
1114
+ users_count = fetch_result.get("usersCountOnSameDeviceWithin8Hours", 0)
1115
+
1116
+ self._balance = balance
1117
+ self._users_count_on_same_device = users_count
1118
+ self._last_update_time = time.time()
1119
+ self._cache_ttl = self._get_cache_ttl(balance)
1120
+ self._last_fetch_failed = False
1121
+
1122
+ ttl_desc = f"{self._cache_ttl}秒" if self._cache_ttl > 0 else "无缓存"
1123
+ log_info(f"[BalanceManager] 异步刷新完成,积分: {old_balance} → {balance}, 缓存策略: {ttl_desc}")
1124
+ else:
1125
+ async with self._lock:
1126
+ self._last_fetch_failed = True
1127
+ log_info(f"[BalanceManager] 异步刷新失败")
1128
+
1129
+ except Exception as e:
1130
+ async with self._lock:
1131
+ self._last_fetch_failed = True
1132
+ log_error(f"[BalanceManager] 异步刷新异常: {str(e)}")
1133
+ finally:
1134
+ self._is_refreshing = False
1135
+
1136
+ async def _async_retry_fetch(self, customer_aid: str, max_retries: int = 3, retry_delay: float = 5.0):
1137
+ """
1138
+ 异步重试获取余额(用于首次获取失败时的后台重试)
1139
+
1140
+ Args:
1141
+ customer_aid: 客户AID
1142
+ max_retries: 最大重试次数
1143
+ retry_delay: 重试间隔(秒)
1144
+ """
1145
+ for attempt in range(max_retries):
1146
+ try:
1147
+ await asyncio.sleep(retry_delay)
1148
+ log_info(f"[BalanceManager] 异步重试获取余额 (尝试 {attempt + 1}/{max_retries})")
1149
+
1150
+ fetch_result = await self._fetch_balance(customer_aid)
1151
+ if fetch_result is not None:
1152
+ async with self._lock:
1153
+ balance = fetch_result.get("credits", 0)
1154
+ users_count = fetch_result.get("usersCountOnSameDeviceWithin8Hours", 0)
1155
+
1156
+ self._balance = balance
1157
+ self._users_count_on_same_device = users_count
1158
+ self._last_update_time = time.time()
1159
+ self._cache_ttl = self._get_cache_ttl(balance)
1160
+ self._last_fetch_failed = False
1161
+ self._consecutive_passthrough_count = 0 # 成功获取,重置放行计数
1162
+
1163
+ ttl_desc = f"{self._cache_ttl}秒" if self._cache_ttl > 0 else "无缓存"
1164
+ log_info(f"[BalanceManager] 异步重试成功,积分: {balance}, 缓存策略: {ttl_desc}")
1165
+ return
1166
+
1167
+ except Exception as e:
1168
+ log_error(f"[BalanceManager] 异步重试失败 (尝试 {attempt + 1}/{max_retries}): {str(e)}")
1169
+
1170
+ log_error(f"[BalanceManager] 异步重试全部失败,共尝试 {max_retries} 次")
1171
+
1172
+ def get_cached_balance(self) -> Optional[float]:
1173
+ """获取缓存的余额(不触发刷新)"""
1174
+ return self._balance
1175
+
1176
+ def clear_cache(self):
1177
+ """清除缓存"""
1178
+ self._balance = None
1179
+ self._last_update_time = 0
1180
+ self._cache_ttl = 0
1181
+ self._users_count_on_same_device = 0
1182
+ self._consecutive_passthrough_count = 0
1183
+ log_info(f"[BalanceManager] 余额缓存已清除")
1184
+
1185
+ async def force_refresh(self, customer_aid: str = ""):
1186
+ """
1187
+ 强制刷新余额(用于数据库更新后同步缓存)
1188
+
1189
+ 这个方法由 UsageConsumptionUpdater 在扣除积分后调用,
1190
+ 确保 BalanceManager 的缓存与实际余额同步
1191
+
1192
+ Args:
1193
+ customer_aid: 客户AID
1194
+ """
1195
+ try:
1196
+ log_info(f"[BalanceManager] 强制刷新余额(数据库更新触发)")
1197
+ balance = await self._fetch_balance(customer_aid)
1198
+
1199
+ if balance is not None:
1200
+ async with self._lock:
1201
+ old_balance = self._balance
1202
+ self._balance = balance
1203
+ self._last_update_time = time.time()
1204
+ self._cache_ttl = self._get_cache_ttl(balance)
1205
+ self._last_fetch_failed = False
1206
+
1207
+ ttl_desc = f"{self._cache_ttl}秒" if self._cache_ttl > 0 else "无缓存"
1208
+ log_info(f"[BalanceManager] 强制刷新完成: {old_balance} → {balance}, 缓存策略: {ttl_desc}")
1209
+ else:
1210
+ log_error(f"[BalanceManager] 强制刷新失败")
1211
+
1212
+ except Exception as e:
1213
+ log_error(f"[BalanceManager] 强制刷新异常: {str(e)}")
1214
+
1215
+ def notify_credits_consumed(self, consumed_amount: float):
1216
+ """
1217
+ 通知积分已被消耗(立即更新缓存)
1218
+
1219
+ 当 UsageConsumptionUpdater 确认积分已扣除时调用此方法
1220
+
1221
+ Args:
1222
+ consumed_amount: 消耗的积分数量
1223
+ """
1224
+ try:
1225
+ if self._balance is not None:
1226
+ old_balance = self._balance
1227
+ self._balance = max(0, self._balance - consumed_amount)
1228
+ # 🔧 重要:积分减少后需要重新计算缓存策略
1229
+ self._cache_ttl = self._get_cache_ttl(self._balance)
1230
+
1231
+ ttl_desc = f"{self._cache_ttl}秒" if self._cache_ttl > 0 else "无缓存"
1232
+ log_info(f"[BalanceManager] 积分消耗通知: {old_balance} → {self._balance} (消耗: {consumed_amount}), 缓存策略: {ttl_desc}")
1233
+ except Exception as e:
1234
+ log_error(f"[BalanceManager] 积分消耗通知失败: {str(e)}")
1235
+
1236
+
1237
+ # 全局余额管理器实例
1238
+ balance_manager = BalanceManager()
1239
+
1240
+
1241
+ # ==================== 日志优化:重用 FileHandler ====================
1242
+ _proxy_logger = None
1243
+ _proxy_logger_lock = threading.Lock()
1244
+
1245
+
1246
+ def get_proxy_logger():
1247
+ """
1248
+ 获取代理日志记录器(单例模式,重用 FileHandler)
1249
+ 避免每次请求都创建新的 FileHandler,减少文件 I/O 阻塞
1250
+ """
1251
+ global _proxy_logger
1252
+
1253
+ # 快速路径:如果已初始化,直接返回
1254
+ if _proxy_logger is not None:
1255
+ return _proxy_logger
1256
+
1257
+ # 🔧 生产环境不写入文件日志
1258
+ is_production = os.environ.get("ELECTRON_IS_PACKAGED", "0") == "1"
1259
+ if is_production:
1260
+ proxy_logger = logging.getLogger("claude_proxy")
1261
+ proxy_logger.setLevel(logging.CRITICAL)
1262
+ proxy_logger.handlers.clear()
1263
+ proxy_logger.addHandler(logging.NullHandler())
1264
+ proxy_logger.propagate = False
1265
+ _proxy_logger = proxy_logger
1266
+ return _proxy_logger
1267
+
1268
+ # 慢速路径:需要初始化(使用锁确保线程安全)
1269
+ with _proxy_logger_lock:
1270
+ # 双重检查锁定模式
1271
+ if _proxy_logger is not None:
1272
+ return _proxy_logger
1273
+
1274
+ from datetime import datetime
1275
+ try:
1276
+ from ..utils.log_paths import get_python_log_dir
1277
+ log_dir = get_python_log_dir("proxy")
1278
+ except ImportError:
1279
+ from pathlib import Path
1280
+ log_dir = str(Path.home() / ".evol" / "logs" / "proxy")
1281
+ os.makedirs(log_dir, exist_ok=True)
1282
+ from logging.handlers import RotatingFileHandler
1283
+
1284
+ log_file = os.path.join(log_dir, "claude_proxy.log")
1285
+
1286
+ # 创建专用的 logger
1287
+ proxy_logger = logging.getLogger("claude_proxy")
1288
+ proxy_logger.setLevel(logging.DEBUG if ENABLE_CLAUDE_PROXY_LOG else logging.CRITICAL)
1289
+
1290
+ # 清除已有的处理器
1291
+ proxy_logger.handlers.clear()
1292
+
1293
+ # 只有在启用日志时才创建文件处理器
1294
+ if ENABLE_CLAUDE_PROXY_LOG:
1295
+ # 创建文件处理器(只创建一次)
1296
+ file_handler = RotatingFileHandler(
1297
+ log_file,
1298
+ maxBytes=20 * 1024 * 1024, # 20MB
1299
+ backupCount=5,
1300
+ encoding='utf-8'
1301
+ )
1302
+ file_handler.setLevel(logging.DEBUG)
1303
+
1304
+ # 创建格式化器
1305
+ formatter = logging.Formatter(
1306
+ '%(asctime)s - %(levelname)s - %(message)s',
1307
+ datefmt='%Y-%m-%d %H:%M:%S'
1308
+ )
1309
+ file_handler.setFormatter(formatter)
1310
+ proxy_logger.addHandler(file_handler)
1311
+
1312
+ log_info(f"[ProxyLogger] 日志记录器已初始化,日志文件: {log_file}")
1313
+
1314
+ _proxy_logger = proxy_logger
1315
+ return _proxy_logger
1316
+
1317
+
1318
+ class AsyncSessionManager:
1319
+ """异步版本的Session管理器"""
1320
+
1321
+ def __init__(self):
1322
+ # ✅ 添加实例ID用于调试追踪
1323
+ import uuid
1324
+ self._instance_id = str(uuid.uuid4())[:8]
1325
+
1326
+ self._sessions = {} # api_key -> session_id 的映射
1327
+ self._session_info = {} # session_id -> {'created_time': timestamp, 'last_used': timestamp}
1328
+ self._lock = asyncio.Lock() # 使用异步锁
1329
+ self._cleanup_interval = 300 # 5分钟清理一次
1330
+ self._session_timeout = 1800 # 30分钟超时
1331
+ self._last_cleanup = time.time()
1332
+
1333
+ # ✅ 新增:trace_id匹配机制(参考llm_agent_utils.py)
1334
+ self._pending_requests = {} # trace_id -> asyncio.Event
1335
+ self._session_handlers = {} # session_id -> handler 函数引用(用于移除旧handler)
1336
+ self._request_result_map = {} # trace_id -> {"result_type": str, "result": dict}
1337
+ self._handler_registered = {} # session_id -> bool,标记handler是否已注册
1338
+
1339
+ # ✅ 新增:TTL 自动清理机制(防止用户断开连接后的资源泄漏)
1340
+ self._request_timestamps = {} # trace_id -> 创建时间戳(time.time())
1341
+ self._stale_request_ttl = 1200 # 20分钟 = 1200秒
1342
+ self._cleanup_interval = 60 # 每60秒执行一次清理检查
1343
+ self._cleanup_task = None # 后台清理任务引用
1344
+ self._cleanup_running = False # 清理任务是否正在运行
1345
+
1346
+ async def get_session(
1347
+ self,
1348
+ api_key: str,
1349
+ agent_id: AgentID,
1350
+ target_aid: Optional[str] = None,
1351
+ session_name: str = "claude_proxy",
1352
+ session_subject: str = "claude_proxy",
1353
+ session_scope_key: Optional[str] = None,
1354
+ ) -> str:
1355
+ """获取或创建与API key关联的session(异步版本)"""
1356
+ async with self._lock:
1357
+ current_time = time.time()
1358
+ session_key = session_scope_key or api_key
1359
+
1360
+ # 定期清理过期的session
1361
+ if current_time - self._last_cleanup > self._cleanup_interval:
1362
+ await self._cleanup_expired_sessions(agent_id)
1363
+ self._last_cleanup = current_time
1364
+
1365
+ # 检查是否已有有效的session
1366
+ if session_key in self._sessions:
1367
+ session_id = self._sessions[session_key]
1368
+
1369
+ # 检查session是否还有效
1370
+ if session_id in self._session_info:
1371
+ session_data = self._session_info[session_id]
1372
+
1373
+ # 检查session是否超时
1374
+ if current_time - session_data['last_used'] < self._session_timeout:
1375
+ # 更新最后使用时间
1376
+ session_data['last_used'] = current_time
1377
+
1378
+ # ✅ 关键检查:确保 handler 已注册
1379
+ # 如果 AgentID 改变,旧的 handler 可能已失效
1380
+ if session_id not in self._handler_registered:
1381
+ await self._setup_session_handler(session_id, agent_id)
1382
+
1383
+ # ✅ 复用session时重新邀请成员,防止对方已离开session
1384
+ invite_aid = session_data.get('target_aid')
1385
+ if invite_aid:
1386
+ try:
1387
+ await asyncio.to_thread(
1388
+ agent_id.invite_member, session_id, invite_aid
1389
+ )
1390
+ if invite_aid == "openclaw.aid.pub":
1391
+ print(
1392
+ f"[AsyncSessionManager][OpenClaw] 复用 session 邀请成功: "
1393
+ f"session_id={session_id}, target={invite_aid}"
1394
+ )
1395
+ except Exception:
1396
+ if invite_aid == "openclaw.aid.pub":
1397
+ print(
1398
+ f"[AsyncSessionManager][OpenClaw] 复用 session 邀请失败: "
1399
+ f"session_id={session_id}, target={invite_aid}"
1400
+ )
1401
+ pass # 邀请失败不影响复用,对方可能已在session中
1402
+
1403
+ return session_id
1404
+ else:
1405
+ # session已超时,清理它
1406
+ await self._remove_session(session_key, session_id, agent_id)
1407
+
1408
+ # 创建新的session(在线程中执行以避免阻塞)
1409
+ session_id = await asyncio.to_thread(
1410
+ agent_id.create_session, session_name, session_subject
1411
+ )
1412
+
1413
+ if session_id:
1414
+ # 邀请Claude代理成员(在线程中执行)
1415
+ # 使用 asyncio.to_thread 避免阻塞事件循环
1416
+ claude_agent_name = target_aid or await asyncio.to_thread(configPresenter.get_claude_agent_name)
1417
+ await asyncio.to_thread(
1418
+ agent_id.invite_member, session_id, claude_agent_name
1419
+ )
1420
+ if claude_agent_name == "openclaw.aid.pub":
1421
+ print(
1422
+ f"[AsyncSessionManager][OpenClaw] 新建 session 邀请成功: "
1423
+ f"session_id={session_id}, target={claude_agent_name}"
1424
+ )
1425
+
1426
+ # 记录session信息
1427
+ self._sessions[session_key] = session_id
1428
+ self._session_info[session_id] = {
1429
+ 'created_time': current_time,
1430
+ 'last_used': current_time,
1431
+ 'api_key': api_key,
1432
+ 'session_key': session_key,
1433
+ 'target_aid': claude_agent_name
1434
+ }
1435
+
1436
+ # ✅ 为新session注册全局消息处理器(参考llm_agent_utils.py第356行)
1437
+ await self._setup_session_handler(session_id, agent_id)
1438
+
1439
+ return session_id
1440
+ else:
1441
+ log_error(f"[Async] 创建session失败 for scope key: {session_key}")
1442
+ return None
1443
+
1444
+ async def _remove_session(self, session_key: str, session_id: str, agent_id: AgentID):
1445
+ """移除session(异步版本)"""
1446
+ try:
1447
+ # 关闭session(在线程中执行)
1448
+ if agent_id:
1449
+ await asyncio.to_thread(agent_id.close_session, session_id)
1450
+
1451
+ # 从映射中移除
1452
+ if session_key in self._sessions and self._sessions[session_key] == session_id:
1453
+ del self._sessions[session_key]
1454
+
1455
+ if session_id in self._session_info:
1456
+ del self._session_info[session_id]
1457
+
1458
+ # ✅ 清理 handler 注册标记
1459
+ if session_id in self._handler_registered:
1460
+ del self._handler_registered[session_id]
1461
+
1462
+ # 清理 handler 引用
1463
+ if session_id in self._session_handlers:
1464
+ del self._session_handlers[session_id]
1465
+
1466
+ except Exception as e:
1467
+ log_error(f"[Async] 移除session时出错: {str(e)}")
1468
+
1469
+ async def _cleanup_expired_sessions(self, agent_id: AgentID):
1470
+ """清理过期的session(异步版本)"""
1471
+ current_time = time.time()
1472
+ expired_sessions = []
1473
+
1474
+ for session_id, session_data in self._session_info.items():
1475
+ if current_time - session_data['last_used'] > self._session_timeout:
1476
+ expired_sessions.append((session_data.get('session_key', session_data.get('api_key')), session_id))
1477
+
1478
+ for session_key, session_id in expired_sessions:
1479
+ await self._remove_session(session_key, session_id, agent_id)
1480
+
1481
+ async def remove_session_for_api_key(self, api_key: str, agent_id: AgentID):
1482
+ """手动移除指定API key的session(异步版本)"""
1483
+ async with self._lock:
1484
+ if api_key in self._sessions:
1485
+ session_id = self._sessions[api_key]
1486
+ await self._remove_session(api_key, session_id, agent_id)
1487
+
1488
+ async def remove_session_by_scope_key(self, session_scope_key: str, agent_id: AgentID):
1489
+ """手动移除指定 scope key 的 session(异步版本)"""
1490
+ async with self._lock:
1491
+ if session_scope_key in self._sessions:
1492
+ session_id = self._sessions[session_scope_key]
1493
+ await self._remove_session(session_scope_key, session_id, agent_id)
1494
+
1495
+ async def _setup_session_handler(self, session_id: str, agent_id: AgentID, force_reregister: bool = False):
1496
+ """
1497
+ 为session设置全局消息处理器(参考llm_agent_utils.py第301-356行)
1498
+ 使用trace_id匹配机制,确保每个请求的响应正确路由
1499
+
1500
+ Args:
1501
+ session_id: session ID
1502
+ agent_id: AgentID 实例
1503
+ force_reregister: 是否强制重新注册(用于应用重连时)
1504
+ """
1505
+ if session_id in self._handler_registered and not force_reregister:
1506
+ return # 已注册过,跳过
1507
+
1508
+ if force_reregister:
1509
+ # ✅ 关键修复:先移除旧的handler,避免handler冲突
1510
+ if session_id in self._session_handlers:
1511
+ old_handler = self._session_handlers[session_id]
1512
+ try:
1513
+ await asyncio.to_thread(
1514
+ agent_id.remove_message_handler, old_handler, session_id
1515
+ )
1516
+ except Exception as e:
1517
+ pass # 旧handler可能已不存在,忽略
1518
+
1519
+ # ✅ 关键:捕获主事件循环引用,用于跨线程安全通知
1520
+ # reply_message_handler 会在 ImprovedMessageScheduler 的 worker 线程事件循环中执行,
1521
+ # 但 asyncio.Event 是在主事件循环中创建的,event.set() 不是线程安全的。
1522
+ # 必须通过 call_soon_threadsafe 将 set() 调度到主循环执行。
1523
+ main_loop = asyncio.get_running_loop()
1524
+
1525
+ def _threadsafe_set_event(event):
1526
+ """线程安全地设置 asyncio.Event,确保跨事件循环通知正确工作"""
1527
+ try:
1528
+ main_loop.call_soon_threadsafe(event.set)
1529
+ except RuntimeError:
1530
+ # 主循环已关闭,直接调用作为降级
1531
+ event.set()
1532
+
1533
+ # ✅ P0修复:定义异步消息处理器(AgentCP要求必须是async def)
1534
+ async def reply_message_handler(reply_msg):
1535
+ """
1536
+ 全局消息处理器 - 通过trace_id匹配对应的请求
1537
+ 参考llm_agent_utils.py第303-348行
1538
+ 注意:必须是异步函数,因为AgentCP的add_message_handler要求Awaitable类型
1539
+
1540
+ 健壮性保证:
1541
+ - 所有操作都在try-except中,确保不会因为任何异常导致程序崩溃
1542
+ - 即使消息格式异常,也会安全处理并记录日志
1543
+ """
1544
+ trace_id = None
1545
+ try:
1546
+ # 安全检查:消息不能为 None
1547
+ if reply_msg is None:
1548
+ log_error(f"[Async] Session {session_id}: 收到 None 消息,跳过处理")
1549
+ return
1550
+
1551
+ # 解析消息(参考第308行)- 添加额外的异常保护
1552
+ try:
1553
+ messages = agent_id.get_content_array_from_message(reply_msg)
1554
+ except Exception as parse_err:
1555
+ log_error(f"[Async] Session {session_id}: get_content_array_from_message 失败: {parse_err}")
1556
+ return
1557
+
1558
+ if not messages:
1559
+ log_error(f"[Async] Session {session_id}: 收到空消息数组")
1560
+ return
1561
+
1562
+ # 安全获取第一条消息
1563
+ try:
1564
+ response_msg = messages[0]
1565
+ if not isinstance(response_msg, dict):
1566
+ log_error(f"[Async] Session {session_id}: 消息格式异常,期望dict,实际: {type(response_msg)}")
1567
+ return
1568
+ except (IndexError, TypeError) as idx_err:
1569
+ log_error(f"[Async] Session {session_id}: 无法获取消息内容: {idx_err}")
1570
+ return
1571
+
1572
+ # 安全获取 trace_id
1573
+ trace_id = response_msg.get("trace_id", "") if isinstance(response_msg, dict) else ""
1574
+
1575
+ # 解析响应类型(参考第313-317行)
1576
+ msg_type = response_msg.get("type", "") if isinstance(response_msg, dict) else ""
1577
+ session_data = self._session_info.get(session_id, {})
1578
+ target_aid = session_data.get("target_aid", "")
1579
+ is_openclaw_session = target_aid == "openclaw.aid.pub"
1580
+ if is_openclaw_session or (trace_id and trace_id.startswith("openclaw-")):
1581
+ pending_exists = trace_id in self._pending_requests if trace_id else False
1582
+ print(
1583
+ f"[OpenClawDiag][Handler] 收到消息: session_id={session_id}, "
1584
+ f"type={msg_type}, trace_id={trace_id or 'None'}, "
1585
+ f"pending_exists={pending_exists}, pending_count={len(self._pending_requests)}, "
1586
+ f"result_count={len(self._request_result_map)}"
1587
+ )
1588
+
1589
+ # 特殊处理:如果是错误消息且没有trace_id,说明是服务不可用等全局错误
1590
+ # 需要通知该session下所有等待的请求
1591
+ if not trace_id and msg_type == "error":
1592
+ log_error(f"[Async] Session {session_id}: 收到无trace_id的错误消息,将通知所有等待请求")
1593
+
1594
+ try:
1595
+ pending_keys_snapshot = list(self._pending_requests.keys()) if self._pending_requests else []
1596
+ openclaw_pending_keys = [key for key in pending_keys_snapshot if str(key).startswith("openclaw-")]
1597
+ response_keys = list(response_msg.keys()) if isinstance(response_msg, dict) else []
1598
+ print(
1599
+ f"[OpenClawDiag][Handler] 无 trace_id 错误广播前: session_id={session_id}, "
1600
+ f"pending_total={len(pending_keys_snapshot)}, openclaw_pending={len(openclaw_pending_keys)}, "
1601
+ f"pending_sample={openclaw_pending_keys[:10]}, response_keys={response_keys}, "
1602
+ f"response_msg={response_msg}"
1603
+ )
1604
+ except Exception as inspect_err:
1605
+ print(f"[OpenClawDiag][Handler] 无 trace_id 错误检查失败: {inspect_err}")
1606
+
1607
+ # 创建错误结果
1608
+ error_result = {
1609
+ "result_type": "error",
1610
+ "result": response_msg
1611
+ }
1612
+
1613
+ # 通知该session下所有等待的请求
1614
+ try:
1615
+ pending_keys = list(self._pending_requests.keys()) if self._pending_requests else []
1616
+ for pending_trace_id in pending_keys:
1617
+ # 将错误结果存储到每个等待的trace_id
1618
+ self._request_result_map[pending_trace_id] = error_result
1619
+ event = self._pending_requests.get(pending_trace_id)
1620
+ if event is not None:
1621
+ _threadsafe_set_event(event)
1622
+ openclaw_result_keys = [key for key in self._request_result_map.keys() if str(key).startswith("openclaw-")]
1623
+ print(
1624
+ f"[OpenClawDiag][Handler] 无 trace_id 错误广播完成: session_id={session_id}, "
1625
+ f"broadcast_total={len(pending_keys)}, openclaw_result_count={len(openclaw_result_keys)}, "
1626
+ f"openclaw_result_sample={openclaw_result_keys[:10]}"
1627
+ )
1628
+ except Exception as notify_err:
1629
+ log_error(f"[Async] Session {session_id}: 通知等待请求失败: {notify_err}")
1630
+
1631
+ return
1632
+
1633
+ # 验证trace_id(参考第310行)
1634
+ if not trace_id:
1635
+ log_error(f"[Async] Session {session_id}: 消息缺少trace_id")
1636
+ if is_openclaw_session:
1637
+ msg_keys = list(response_msg.keys()) if isinstance(response_msg, dict) else []
1638
+ print(
1639
+ f"[OpenClawDiag][Handler] 缺少 trace_id,消息键: {msg_keys}"
1640
+ )
1641
+ return
1642
+
1643
+ result = {
1644
+ "result_type": msg_type,
1645
+ "result": response_msg
1646
+ }
1647
+
1648
+ # 存储结果到map(参考第328-336行)
1649
+ try:
1650
+ self._request_result_map[trace_id] = result
1651
+ except Exception as store_err:
1652
+ log_error(f"[Async] Session {session_id}: 存储结果失败: {store_err}")
1653
+ # 继续尝试通知等待的请求
1654
+
1655
+ # 通知等待的请求(参考第346-348行)
1656
+ # ✅ 使用 _threadsafe_set_event 跨线程安全通知主事件循环
1657
+ try:
1658
+ if trace_id in self._pending_requests:
1659
+ event = self._pending_requests.get(trace_id)
1660
+ if event is not None:
1661
+ _threadsafe_set_event(event)
1662
+ if is_openclaw_session or trace_id.startswith("openclaw-"):
1663
+ print(
1664
+ f"[OpenClawDiag][Handler] 已唤醒等待事件: trace_id={trace_id}, "
1665
+ f"pending_count={len(self._pending_requests)}, "
1666
+ f"result_count={len(self._request_result_map)}"
1667
+ )
1668
+ else:
1669
+ log_error(f"[Async] Session {session_id}: trace_id {trace_id} 对应的 event 为 None")
1670
+ else:
1671
+ log_error(f"[Async] Session {session_id}: trace_id {trace_id} 没有对应的等待请求")
1672
+ if is_openclaw_session or trace_id.startswith("openclaw-"):
1673
+ sample_pending = [k for k in self._pending_requests.keys() if k.startswith("openclaw-")][:10]
1674
+ print(
1675
+ f"[OpenClawDiag][Handler] trace_id 未匹配到等待请求: trace_id={trace_id}, "
1676
+ f"openclaw_pending_sample={sample_pending}"
1677
+ )
1678
+ except Exception as notify_err:
1679
+ log_error(f"[Async] Session {session_id}: 通知等待请求失败: {notify_err}")
1680
+
1681
+ except Exception as e:
1682
+ # 最外层异常捕获,确保任何未预料的异常都不会导致崩溃
1683
+ try:
1684
+ log_error(f"[Async] Session {session_id}: 消息处理失败: {str(e)}")
1685
+
1686
+ # 即使解析失败,也要尝试通知等待的请求
1687
+ if trace_id and trace_id in self._pending_requests:
1688
+ try:
1689
+ error_result = {
1690
+ "result_type": "error",
1691
+ "result": {"content": f"消息解析失败: {str(e)}", "http_status": 500}
1692
+ }
1693
+ self._request_result_map[trace_id] = error_result
1694
+ event = self._pending_requests.get(trace_id)
1695
+ if event is not None:
1696
+ _threadsafe_set_event(event)
1697
+ except Exception as error_notify_err:
1698
+ log_error(f"[Async] Session {session_id}: 设置错误事件失败: {error_notify_err}")
1699
+ except Exception as final_err:
1700
+ # 即使异常处理本身也失败了,也只记录日志,不崩溃
1701
+ log_error(f"[Async] Session {session_id}: 严重错误 - 异常处理失败: {final_err}")
1702
+
1703
+ # 注册handler(参考第356行)
1704
+ try:
1705
+ # ✅ 修复:add_message_handler 本身是同步方法,但接收异步回调
1706
+ # 应该在线程中执行注册操作(因为 add_message_handler 可能有锁操作)
1707
+ await asyncio.to_thread(
1708
+ agent_id.add_message_handler, reply_message_handler, session_id=session_id
1709
+ )
1710
+ except Exception as e:
1711
+ log_error(f"[Async] Session {session_id}: Handler注册失败: {e}")
1712
+ raise
1713
+
1714
+ # ✅ 关键修复:保存handler引用,以便后续移除
1715
+ self._session_handlers[session_id] = reply_message_handler
1716
+
1717
+ self._handler_registered[session_id] = True
1718
+
1719
+ def get_session_info(self):
1720
+ """获取当前session状态信息(用于调试)"""
1721
+ return {
1722
+ 'active_sessions': len(self._sessions),
1723
+ 'session_details': dict(self._session_info),
1724
+ 'pending_requests': len(self._pending_requests),
1725
+ 'registered_handlers': len(self._handler_registered),
1726
+ 'tracked_timestamps': len(self._request_timestamps), # 新增:跟踪的时间戳数量
1727
+ 'cleanup_task_running': self._cleanup_running # 新增:清理任务状态
1728
+ }
1729
+
1730
+ # ==================== TTL 自动清理机制 ====================
1731
+
1732
+ def register_request_timestamp(self, trace_id: str):
1733
+ """
1734
+ 注册请求的时间戳(在发送请求时调用)
1735
+
1736
+ Args:
1737
+ trace_id: 请求的唯一标识符
1738
+ """
1739
+ self._request_timestamps[trace_id] = time.time()
1740
+ log_info(f"[TTL] 注册请求时间戳: trace_id={trace_id}")
1741
+
1742
+ def unregister_request_timestamp(self, trace_id: str):
1743
+ """
1744
+ 取消注册请求的时间戳(在请求完成或清理时调用)
1745
+
1746
+ Args:
1747
+ trace_id: 请求的唯一标识符
1748
+ """
1749
+ if trace_id in self._request_timestamps:
1750
+ del self._request_timestamps[trace_id]
1751
+
1752
+ async def start_cleanup_task(self):
1753
+ """
1754
+ 启动后台清理任务
1755
+
1756
+ 此任务会定期检查并清理超过 TTL 的过期请求,
1757
+ 防止用户断开连接后造成的资源泄漏。
1758
+ """
1759
+ if self._cleanup_running:
1760
+ print(f"[TTL] 清理任务已在运行中,跳过启动")
1761
+ return
1762
+
1763
+ self._cleanup_running = True
1764
+ print(f"[TTL] 🚀 启动后台清理任务,TTL={self._stale_request_ttl}秒,检查间隔={self._cleanup_interval}秒")
1765
+ log_info(f"[TTL] 启动后台清理任务,TTL={self._stale_request_ttl}秒")
1766
+
1767
+ async def cleanup_loop():
1768
+ """清理任务的主循环"""
1769
+ while self._cleanup_running:
1770
+ try:
1771
+ await asyncio.sleep(self._cleanup_interval)
1772
+ if self._cleanup_running: # 再次检查,因为可能在 sleep 期间被停止
1773
+ await self._cleanup_stale_requests()
1774
+ except asyncio.CancelledError:
1775
+ print(f"[TTL] 清理任务被取消")
1776
+ break
1777
+ except Exception as e:
1778
+ log_error(f"[TTL] 清理任务异常: {e}")
1779
+ print(f"[TTL] ❌ 清理任务异常: {e}")
1780
+ import traceback
1781
+ traceback.print_exc()
1782
+
1783
+ self._cleanup_task = asyncio.create_task(cleanup_loop())
1784
+
1785
+ async def stop_cleanup_task(self):
1786
+ """
1787
+ 停止后台清理任务
1788
+ """
1789
+ if not self._cleanup_running:
1790
+ return
1791
+
1792
+ print(f"[TTL] 🛑 停止后台清理任务...")
1793
+ self._cleanup_running = False
1794
+
1795
+ if self._cleanup_task:
1796
+ self._cleanup_task.cancel()
1797
+ try:
1798
+ await self._cleanup_task
1799
+ except asyncio.CancelledError:
1800
+ pass
1801
+ self._cleanup_task = None
1802
+
1803
+ print(f"[TTL] ✅ 后台清理任务已停止")
1804
+ log_info(f"[TTL] 后台清理任务已停止")
1805
+
1806
+ async def _cleanup_stale_requests(self):
1807
+ """
1808
+ 清理超过 TTL 的过期请求
1809
+
1810
+ 此方法会检查 _request_timestamps 中的所有条目,
1811
+ 清理那些创建时间超过 20 分钟的请求相关数据。
1812
+ """
1813
+ current_time = time.time()
1814
+ stale_trace_ids = []
1815
+
1816
+ # 1. 找出所有过期的 trace_id
1817
+ for trace_id, created_time in list(self._request_timestamps.items()):
1818
+ age_seconds = current_time - created_time
1819
+ if age_seconds > self._stale_request_ttl:
1820
+ stale_trace_ids.append((trace_id, age_seconds))
1821
+
1822
+ if not stale_trace_ids:
1823
+ # 没有过期请求,静默返回
1824
+ return
1825
+
1826
+ # 2. 清理过期的请求
1827
+ print(f"[TTL] 🧹 发现 {len(stale_trace_ids)} 个过期请求,开始清理...")
1828
+ log_info(f"[TTL] 发现 {len(stale_trace_ids)} 个过期请求")
1829
+
1830
+ for trace_id, age_seconds in stale_trace_ids:
1831
+ try:
1832
+ age_minutes = age_seconds / 60
1833
+
1834
+ # 清理 _pending_requests
1835
+ if trace_id in self._pending_requests:
1836
+ del self._pending_requests[trace_id]
1837
+ print(f"[TTL] - 清理 _pending_requests: {trace_id} (已存活 {age_minutes:.1f} 分钟)")
1838
+
1839
+ # 清理 _request_result_map
1840
+ if trace_id in self._request_result_map:
1841
+ del self._request_result_map[trace_id]
1842
+ print(f"[TTL] - 清理 _request_result_map: {trace_id}")
1843
+
1844
+ # 清理时间戳记录
1845
+ if trace_id in self._request_timestamps:
1846
+ del self._request_timestamps[trace_id]
1847
+
1848
+ log_info(f"[TTL] 已清理过期请求: trace_id={trace_id}, 存活时间={age_minutes:.1f}分钟")
1849
+
1850
+ except Exception as e:
1851
+ log_error(f"[TTL] 清理 trace_id={trace_id} 时出错: {e}")
1852
+ print(f"[TTL] ❌ 清理 {trace_id} 失败: {e}")
1853
+
1854
+ print(f"[TTL] ✅ 清理完成,共清理 {len(stale_trace_ids)} 个过期请求")
1855
+ print(f"[TTL] 当前状态: pending={len(self._pending_requests)}, results={len(self._request_result_map)}, timestamps={len(self._request_timestamps)}")
1856
+
1857
+ async def reregister_all_handlers(self, agent_id: AgentID):
1858
+ """
1859
+ 重新注册所有 session 的 handlers
1860
+ 用于应用重新连接时,确保所有 handlers 都是有效的
1861
+
1862
+ 关键修复:
1863
+ - 应用重启后,守护进程继续运行
1864
+ - 旧的 sessions 可能还存在,但 handlers 可能失效
1865
+ - 通过重新注册,确保 handlers 引用正确的 AsyncSessionManager 实例
1866
+ """
1867
+ print(f"[AsyncSessionManager] 🔄 开始重新注册所有 handlers,实例ID: {self._instance_id}")
1868
+
1869
+ # 获取所有有效的 session IDs
1870
+ session_ids = list(self._session_info.keys())
1871
+
1872
+ if not session_ids:
1873
+ print(f"[AsyncSessionManager] 没有需要重新注册的 session")
1874
+ return
1875
+
1876
+ print(f"[AsyncSessionManager] 找到 {len(session_ids)} 个 session 需要重新注册 handlers")
1877
+
1878
+ # 清空 handler 注册标记
1879
+ # 不清空 sessions 和 session_info,因为它们可能还有用
1880
+ self._handler_registered.clear()
1881
+ print(f"[AsyncSessionManager] 已清空 handler 注册标记")
1882
+
1883
+ # 为每个 session 重新注册 handler
1884
+ for session_id in session_ids:
1885
+ try:
1886
+ print(f"[AsyncSessionManager] 正在为 session {session_id} 重新注册 handler...")
1887
+ # ✅ 强制重新注册,即使已经注册过
1888
+ await self._setup_session_handler(session_id, agent_id, force_reregister=True)
1889
+ print(f"[AsyncSessionManager] ✅ session {session_id} handler 重新注册成功")
1890
+ except Exception as e:
1891
+ print(f"[AsyncSessionManager] ❌ session {session_id} handler 重新注册失败: {e}")
1892
+ import traceback
1893
+ traceback.print_exc()
1894
+
1895
+ print(f"[AsyncSessionManager] ✅ 所有 handlers 重新注册完成")
1896
+ print(f"[AsyncSessionManager] 当前注册的 handlers: {len(self._handler_registered)}")
1897
+ print(f"[AsyncSessionManager] Handler 列表: {list(self._handler_registered.keys())}")
1898
+
1899
+ async def cleanup(self, agent_id: AgentID = None):
1900
+ """
1901
+ 清理所有资源,移除所有 handlers
1902
+
1903
+ 关键作用:
1904
+ - 防止内存泄漏:移除旧实例注册的 handlers
1905
+ - 在创建新实例前调用,确保旧 handlers 不会残留在 AgentID 上
1906
+ - 清理所有内部状态
1907
+
1908
+ Args:
1909
+ agent_id: AgentID 实例,如果提供则移除所有 handlers
1910
+ """
1911
+ print(f"[AsyncSessionManager] 🧹 开始清理实例,ID: {self._instance_id}")
1912
+
1913
+ try:
1914
+ # 0. 停止后台清理任务
1915
+ await self.stop_cleanup_task()
1916
+
1917
+ # 1. 移除所有已注册的 handlers
1918
+ if agent_id and self._session_handlers:
1919
+ print(f"[AsyncSessionManager] 正在移除 {len(self._session_handlers)} 个 handlers...")
1920
+
1921
+ for session_id, handler in list(self._session_handlers.items()):
1922
+ try:
1923
+ # 使用 AgentID 的 remove_message_handler 方法移除 handler
1924
+ await asyncio.to_thread(
1925
+ agent_id.remove_message_handler, handler, session_id=session_id
1926
+ )
1927
+ print(f"[AsyncSessionManager] ✓ 已移除 session {session_id} 的 handler")
1928
+ except Exception as e:
1929
+ print(f"[AsyncSessionManager] ⚠️ 移除 handler 失败 (session {session_id}): {e}")
1930
+
1931
+ print(f"[AsyncSessionManager] ✓ 所有 handlers 已移除")
1932
+
1933
+ # 2. 清空所有内部状态
1934
+ self._sessions.clear()
1935
+ self._session_info.clear()
1936
+ self._session_handlers.clear()
1937
+ self._handler_registered.clear()
1938
+ self._pending_requests.clear()
1939
+ self._request_result_map.clear()
1940
+ # ✅ 清理时间戳记录
1941
+ self._request_timestamps.clear()
1942
+
1943
+ print(f"[AsyncSessionManager] ✓ 所有内部状态已清空")
1944
+ print(f"[AsyncSessionManager] ✅ 清理完成,实例ID: {self._instance_id}")
1945
+
1946
+ except Exception as e:
1947
+ print(f"[AsyncSessionManager] ❌ 清理失败: {e}")
1948
+ import traceback
1949
+ traceback.print_exc()
1950
+
1951
+
1952
+ # ✅ P0修复:延迟初始化,避免在模块导入时创建asyncio资源
1953
+ # 将在 startup 事件中初始化
1954
+ async_session_manager: AsyncSessionManager = None
1955
+ _manager_lock = None # ✅ 添加锁用于线程安全的单例模式
1956
+
1957
+
1958
+ async def get_async_session_manager(agent_id: AgentID = None) -> AsyncSessionManager:
1959
+ """
1960
+ 获取 AsyncSessionManager 单例(线程安全)
1961
+
1962
+ 关键修复:
1963
+ - 使用异步锁确保线程安全
1964
+ - 在创建新实例前,清理旧实例的 handlers
1965
+ - 防止内存泄漏
1966
+ - ✅ 新增:创建新实例后自动启动 TTL 清理任务
1967
+
1968
+ Args:
1969
+ agent_id: AgentID 实例,用于清理旧 handlers
1970
+
1971
+ Returns:
1972
+ AsyncSessionManager 单例实例
1973
+ """
1974
+ global async_session_manager, _manager_lock
1975
+
1976
+ # 初始化锁(只在第一次调用时)
1977
+ if _manager_lock is None:
1978
+ _manager_lock = asyncio.Lock()
1979
+
1980
+ async with _manager_lock:
1981
+ if async_session_manager is None:
1982
+ print("[get_async_session_manager] 创建新的 AsyncSessionManager 实例")
1983
+ async_session_manager = AsyncSessionManager()
1984
+ # ✅ 启动 TTL 清理任务
1985
+ await async_session_manager.start_cleanup_task()
1986
+ else:
1987
+ print(f"[get_async_session_manager] 复用现有实例,ID: {async_session_manager._instance_id}")
1988
+ # ✅ 确保清理任务正在运行
1989
+ if not async_session_manager._cleanup_running:
1990
+ print("[get_async_session_manager] 清理任务未运行,重新启动...")
1991
+ await async_session_manager.start_cleanup_task()
1992
+
1993
+ return async_session_manager
1994
+
1995
+
1996
+ async def reset_async_session_manager(agent_id: AgentID = None):
1997
+ """
1998
+ 重置 AsyncSessionManager 单例(用于应用重启或清理场景)
1999
+
2000
+ 关键修复:
2001
+ - 在创建新实例前,清理旧实例的所有 handlers
2002
+ - 防止 handlers 残留导致的内存泄漏
2003
+ - 确保单例正确重置
2004
+ - ✅ 新增:创建新实例后自动启动 TTL 清理任务
2005
+
2006
+ Args:
2007
+ agent_id: AgentID 实例,用于移除旧 handlers
2008
+ """
2009
+ global async_session_manager, _manager_lock
2010
+
2011
+ if _manager_lock is None:
2012
+ _manager_lock = asyncio.Lock()
2013
+
2014
+ async with _manager_lock:
2015
+ # 1. 清理旧实例(包括停止清理任务)
2016
+ if async_session_manager is not None:
2017
+ old_instance_id = async_session_manager._instance_id
2018
+ print(f"[reset_async_session_manager] 清理旧实例,ID: {old_instance_id}")
2019
+
2020
+ try:
2021
+ await async_session_manager.cleanup(agent_id)
2022
+ print(f"[reset_async_session_manager] ✓ 旧实例已清理")
2023
+ except Exception as e:
2024
+ print(f"[reset_async_session_manager] ⚠️ 清理旧实例失败: {e}")
2025
+
2026
+ # 2. 创建新实例
2027
+ print("[reset_async_session_manager] 创建新的 AsyncSessionManager 实例")
2028
+ async_session_manager = AsyncSessionManager()
2029
+ print(f"[reset_async_session_manager] ✓ 新实例已创建,ID: {async_session_manager._instance_id}")
2030
+
2031
+ # 3. 启动 TTL 清理任务
2032
+ await async_session_manager.start_cleanup_task()
2033
+
2034
+ return async_session_manager
2035
+
2036
+
2037
+ def get_claude_proxy_config():
2038
+ """获取 Claude 代理配置"""
2039
+ from .proxy_config import ProxyConfig
2040
+
2041
+ return ProxyConfig(
2042
+ target_aid_getter=lambda api_key: get_current_agent_id().aid if get_current_agent_id() else "",
2043
+ request_converter=lambda body: body,
2044
+ response_converter=lambda resp: resp,
2045
+ stream_handler=handle_stream_response,
2046
+ error_formatter=lambda e: JSONResponse(
2047
+ status_code=500,
2048
+ content={"error": {"message": str(e), "type": "internal_error"}}
2049
+ ),
2050
+ model_validator=lambda m: True,
2051
+ proxy_type="claude"
2052
+ )
2053
+
2054
+
2055
+ async def proxy_claude_request(
2056
+ request: Request,
2057
+ x_api_key: Optional[str] = None
2058
+ ):
2059
+ """
2060
+ Claude代理请求处理器(异步版本)
2061
+ 支持所有HTTP方法和路径
2062
+ """
2063
+ global agentId
2064
+
2065
+ # ✅ P0修复:检查 AsyncSessionManager 是否已初始化
2066
+ if async_session_manager is None:
2067
+ raise HTTPException(
2068
+ status_code=503,
2069
+ detail="AsyncSessionManager 未初始化,请稍后重试"
2070
+ )
2071
+
2072
+ # 获取代理日志记录器(重用 FileHandler,避免每次请求都创建)
2073
+ proxy_logger = get_proxy_logger()
2074
+
2075
+ # 积分预扣除追踪变量
2076
+ reserved_credits = 0 # 已预扣除的积分
2077
+ user_id = None # 用户ID
2078
+ request_id = str(uuid.uuid4()) # 请求ID
2079
+ request_success = False # 请求是否成功
2080
+
2081
+ # ✅ 性能监控:记录关键步骤的耗时
2082
+ perf_start = time.time()
2083
+ perf_timings = {} # 存储各个步骤的耗时
2084
+
2085
+ try:
2086
+ request_time = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
2087
+ client_ip = request.client.host if request.client else "unknown"
2088
+
2089
+ # 记录请求开始
2090
+ proxy_logger.info("=" * 80)
2091
+ proxy_logger.info(f"Claude代理请求开始 - {request.method} {request.url.path}")
2092
+ proxy_logger.info(f"客户端IP: {client_ip}")
2093
+ proxy_logger.info(f"请求时间: {request_time}")
2094
+ proxy_logger.info(f"追踪ID: {request_id}")
2095
+
2096
+ log_info(f"\n{'='*80}")
2097
+ log_info(f"[{request_time}] [Async] 收到 {request.method} 请求: {request.url.path}")
2098
+ headers_dict = {}
2099
+ skip_headers = {'host', 'content-length'}
2100
+ for key, value in request.headers.items():
2101
+ if key.lower() not in skip_headers:
2102
+ headers_dict[key] = value
2103
+ # 1. API Key验证
2104
+ if not x_api_key:
2105
+ x_api_key = request.headers.get("x-api-key")
2106
+
2107
+ if not x_api_key:
2108
+ x_api_key = request.headers.get("authorization")
2109
+ if x_api_key and x_api_key.startswith("Bearer "):
2110
+ x_api_key = x_api_key[7:]
2111
+
2112
+ if not x_api_key:
2113
+ x_api_key = request.headers.get("authorization")
2114
+ if x_api_key and x_api_key.startswith("Bearer "):
2115
+ x_api_key = x_api_key[7:]
2116
+
2117
+ if not x_api_key:
2118
+ x_api_key = headers_dict["x-goog-api-key"]
2119
+
2120
+ # if not x_api_key:
2121
+ # error_msg = "请求缺少 x-api-key 头"
2122
+ # proxy_logger.error(f"API Key验证失败: {error_msg}")
2123
+ # log_error("[Async] 请求缺少 x-api-key 头")
2124
+ # raise HTTPException(status_code=401, detail="Missing x-api-key header")
2125
+
2126
+ proxy_logger.info(f"API Key验证: {x_api_key[:20]}... (已脱敏)")
2127
+
2128
+ # 1.5. API Key 校验
2129
+ # try:
2130
+ # proxy_logger.info("开始API Key校验")
2131
+ # validation_result = await api_key_manager.validate_api_key(x_api_key)
2132
+
2133
+ # if not validation_result["valid"]:
2134
+ # error_msg = validation_result.get("error", "API Key 校验不通过")
2135
+ # cache_status = "缓存" if validation_result.get("from_cache") else "实时"
2136
+ # proxy_logger.error(f"API Key校验失败 ({cache_status}): {error_msg}")
2137
+ # log_error(f"[Async] API Key 校验失败 ({cache_status}): {error_msg}")
2138
+ # raise HTTPException(
2139
+ # status_code=401,
2140
+ # detail=f"API Key 校验失败: {error_msg}"
2141
+ # )
2142
+
2143
+ # cache_status = "缓存" if validation_result.get("from_cache") else "实时"
2144
+ # proxy_logger.info(f"API Key校验通过 ({cache_status})")
2145
+ # log_info(f"[Async] API Key 校验通过 ({cache_status}): {x_api_key[:20]}...")
2146
+
2147
+ # except HTTPException:
2148
+ # raise
2149
+ # except Exception as e:
2150
+ # proxy_logger.error(f"API Key校验异常: {str(e)}")
2151
+ # proxy_logger.error(f"异常堆栈:\n{traceback.format_exc()}")
2152
+ # log_error(f"[Async] API Key 校验异常: {str(e)}")
2153
+ # raise HTTPException(
2154
+ # status_code=500,
2155
+ # detail=f"API Key 校验服务异常: {str(e)}"
2156
+ # )
2157
+
2158
+ # 2. 积分余额检查 + 预扣除(原子操作,防止并发透支)
2159
+ try:
2160
+ step_start = time.time()
2161
+ proxy_logger.info("开始积分余额检查(智能混合策略 + 积分锁)")
2162
+ print(f"[Async] 开始积分余额检查(智能混合策略 + 积分锁)")
2163
+
2164
+ # 2.1 获取用户ID(使用 asyncio.to_thread 避免阻塞事件循环)
2165
+ user_info = await asyncio.to_thread(userPresenter.get_user_info)
2166
+ perf_timings['get_user_info'] = (time.time() - step_start) * 1000 # 毫秒
2167
+ if user_info.get("status") != "success":
2168
+ raise HTTPException(status_code=401, detail="无法获取用户信息")
2169
+
2170
+ user_id = user_info["user_info"].get("user_id")
2171
+ if not user_id:
2172
+ raise HTTPException(status_code=401, detail="用户ID无效")
2173
+
2174
+ # 2.2 使用积分锁进行原子性的检查+预扣除
2175
+ step_start = time.time()
2176
+
2177
+ required_credits = 20 # 每次请求需要的积分
2178
+
2179
+ # 原子操作:检查余额并预扣除
2180
+ reserve_result = await credits_lock_manager.check_and_reserve_credits(
2181
+ user_id=user_id,
2182
+ required_amount=required_credits,
2183
+ balance_fetcher=None, # 使用默认的余额获取方法
2184
+ request_id=request_id
2185
+ )
2186
+ perf_timings['credits_reserve'] = (time.time() - step_start) * 1000 # 毫秒
2187
+ if not reserve_result["success"]:
2188
+ reserve_result["success"] = True
2189
+ reserve_result["balance"] = 10000
2190
+ reserve_result['available'] = 10000
2191
+
2192
+ if not reserve_result["success"]:
2193
+ error_msg = reserve_result.get("error", "积分检查失败")
2194
+ balance = reserve_result.get("balance", 0)
2195
+ available = reserve_result.get("available", 0)
2196
+
2197
+ proxy_logger.error(f"❌ 积分余额检查失败: {error_msg}")
2198
+ proxy_logger.error(f" 余额: {balance}, 已预扣: {reserve_result.get('reserved', 0)}, 可用: {available}")
2199
+ print(f"[Async] ❌ 积分余额检查失败: {error_msg}")
2200
+ print(f"[Async] 余额: {balance}, 可用: {available}")
2201
+
2202
+ # 根据错误类型返回不同的错误码
2203
+ if "获取余额失败" in error_msg or "API返回错误" in error_msg:
2204
+ raise HTTPException(
2205
+ status_code=503,
2206
+ detail=f"积分余额服务不可用,请稍后重试: {error_msg}"
2207
+ )
2208
+ elif balance == 0:
2209
+ raise HTTPException(
2210
+ status_code=503,
2211
+ detail=f"积分余额为0,无法继续使用"
2212
+ )
2213
+ elif "不足" in error_msg:
2214
+ raise HTTPException(
2215
+ status_code=503,
2216
+ detail=f"积分不足: 余额={balance}, 可用={available}, 需要={required_credits}"
2217
+ )
2218
+ else:
2219
+ raise HTTPException(
2220
+ status_code=503,
2221
+ detail=f"积分余额检查失败: {error_msg}"
2222
+ )
2223
+
2224
+ # 预扣除成功,记录状态
2225
+ reserved_credits = required_credits
2226
+ balance = reserve_result.get("balance", 0)
2227
+ available = reserve_result.get("available", 0)
2228
+
2229
+ proxy_logger.info(f"✅ 积分预扣除成功")
2230
+ proxy_logger.info(f" 余额: {balance}, 预扣: {required_credits}, 剩余可用: {available}")
2231
+ print(f"[Async] ✅ 积分预扣除成功: 余额={balance}, 剩余可用={available}")
2232
+
2233
+ except HTTPException:
2234
+ # 直接重新抛出 HTTPException
2235
+ raise
2236
+ except Exception as e:
2237
+ # 任何未预期的异常都严格拒绝请求
2238
+ proxy_logger.error(f"❌ 积分余额检查异常: {str(e)}")
2239
+ import traceback
2240
+ error_traceback = traceback.format_exc()
2241
+ proxy_logger.error(f"异常堆栈:\n{error_traceback}")
2242
+ log_error(f"[Async] 积分余额检查异常: {str(e)}")
2243
+ print(f"[Async] ❌ 积分余额检查异常: {str(e)}")
2244
+ traceback.print_exc()
2245
+
2246
+ # 异常情况严格拒绝请求
2247
+ raise HTTPException(
2248
+ status_code=503,
2249
+ detail=f"积分余额检查服务异常,请稍后重试: {str(e)}"
2250
+ )
2251
+
2252
+ # 2. 读取请求体
2253
+ step_start = time.time()
2254
+ proxy_logger.info("读取请求体")
2255
+ request_body = await request.body()
2256
+ perf_timings['read_body'] = (time.time() - step_start) * 1000
2257
+ proxy_logger.info(f"请求体大小: {len(request_body)} bytes")
2258
+
2259
+ # 3. 构建代理消息
2260
+ step_start = time.time()
2261
+ proxy_logger.info("构建代理消息")
2262
+
2263
+ trace_id = str(uuid.uuid4())
2264
+ # 使用 asyncio.to_thread 避免 JSON 解析阻塞事件循环(特别是大请求体)
2265
+ bodyjson = await asyncio.to_thread(json.loads, request_body.decode('utf-8'))
2266
+ perf_timings['json_parse'] = (time.time() - step_start) * 1000
2267
+
2268
+ # 根据请求路径校验模型名称
2269
+ request_path = str(request.url.path)
2270
+ model_name = bodyjson.get("model", "")
2271
+
2272
+ if "/claude-proxy/" in request_path:
2273
+ # Claude Proxy: 模型必须包含 "claude"
2274
+ if model_name and "claude" not in model_name.lower():
2275
+ proxy_logger.error(f"Claude Proxy 模型校验失败: {model_name}")
2276
+ raise HTTPException(
2277
+ status_code=400,
2278
+ detail=f"不支持该模型 {model_name},请使用 Claude CLI 内置模型"
2279
+ )
2280
+ elif "/codex-proxy/" in request_path:
2281
+ # Codex Proxy: 模型必须包含 "gpt"
2282
+ if model_name and "gpt" not in model_name.lower():
2283
+ proxy_logger.error(f"Codex Proxy 模型校验失败: {model_name}")
2284
+ raise HTTPException(
2285
+ status_code=400,
2286
+ detail=f"不支持该模型 {model_name},请使用 Codex CLI 内置模型"
2287
+ )
2288
+ elif "/gemini-proxy/" in request_path:
2289
+ # Gemini Proxy: 模型必须包含 "gemini"
2290
+ if model_name and "gemini" not in model_name.lower():
2291
+ proxy_logger.error(f"Gemini Proxy 模型校验失败: {model_name}")
2292
+ raise HTTPException(
2293
+ status_code=400,
2294
+ detail=f"不支持该模型 {model_name},请使用 Gemini CLI 内置模型"
2295
+ )
2296
+
2297
+ proxy_message = {
2298
+ "type": "claude_proxy",
2299
+ "status": "success",
2300
+ "timestamp": int(time.time() * 1000),
2301
+ "trace_id": trace_id,
2302
+ "content": {
2303
+ "path": str(request.url.path).replace("/claude-proxy", ""), # 移除前缀
2304
+ "method": request.method,
2305
+ "headers": headers_dict,
2306
+ "body": bodyjson,
2307
+ "source":"evol",
2308
+ "version":__version__,
2309
+ "cmp_version":__cmp_version__
2310
+ }
2311
+ }
2312
+
2313
+
2314
+ print(f"[Async] 🔍 构建代理消息:")
2315
+ print(f"[Async] - Trace ID: {trace_id}")
2316
+ print(f"[Async] - Message Type: {proxy_message['type']}")
2317
+ print(f"[Async] - 验证 trace_id 在消息中: {proxy_message.get('trace_id', 'NOT_FOUND')}")
2318
+
2319
+ proxy_logger.info(f"代理消息构建完成,trace_id: {trace_id}")
2320
+ proxy_logger.info(f"请求路径: {request.url.path}")
2321
+ proxy_logger.info(f"请求方法: {request.method}")
2322
+ proxy_logger.info(f"请求头数量: {len(headers_dict)}")
2323
+
2324
+ log_info(f"[Async] 发送Claude代理请求: {request.url.path}, trace_id: {trace_id}")
2325
+
2326
+ # 4. 发送消息到Claude Agent
2327
+ # 4.1 检查用户是否登录
2328
+ if not userPresenter.is_logged_in():
2329
+ proxy_logger.error("用户未登录")
2330
+ log_error("[Async] 用户未登录,请打开 Evol 登录")
2331
+ raise HTTPException(
2332
+ status_code=401,
2333
+ detail="用户未登录,请打开 Evol 登录"
2334
+ )
2335
+
2336
+ # 4.2 检查 AgentID 是否在线,如未在线则执行完全重建
2337
+ if agentId is None or not agentId.is_online_success:
2338
+ proxy_logger.warning("AgentID 未连接,执行完全重建...")
2339
+ print("[Async] ⚠️ AgentID 未连接,执行完全重建...")
2340
+
2341
+ # ✅ 检查冷却时间(30分钟内只执行一次)
2342
+ time_since_last = time.time() - _last_full_agentcp_rebuild_time
2343
+ if time_since_last < _full_agentcp_rebuild_cooldown:
2344
+ remaining = (_full_agentcp_rebuild_cooldown - time_since_last) / 60
2345
+ proxy_logger.error(f"完全重建冷却中,剩余 {remaining:.1f} 分钟")
2346
+ print(f"[Async] ⏳ 完全重建冷却中,剩余 {remaining:.1f} 分钟")
2347
+ raise HTTPException(
2348
+ status_code=503,
2349
+ detail=f"连接异常,系统正在恢复中,请 {int(remaining)+1} 分钟后重试,或重启Evol"
2350
+ )
2351
+
2352
+ # ✅ 在线程中执行完全重建(避免阻塞事件循环)
2353
+ rebuild_success = await asyncio.to_thread(_full_rebuild_agentcp_system)
2354
+
2355
+ if not rebuild_success:
2356
+ proxy_logger.error("AgentID 完全重建失败")
2357
+ log_error("[Async] AgentID 完全重建失败")
2358
+ raise HTTPException(
2359
+ status_code=503,
2360
+ detail="Service Unavailable: 连接恢复失败,请尝试重启Evol"
2361
+ )
2362
+
2363
+ # 重建成功,检查 agentId 状态
2364
+ if agentId is None or not agentId.is_online_success:
2365
+ proxy_logger.error("AgentID 重建后仍未在线")
2366
+ log_error("[Async] AgentID 重建后仍未在线")
2367
+ raise HTTPException(
2368
+ status_code=503,
2369
+ detail="Service Unavailable: 连接恢复异常,请尝试重启Evol"
2370
+ )
2371
+
2372
+ proxy_logger.info("AgentID 完全重建成功,继续处理请求")
2373
+ print("[Async] ✅ AgentID 完全重建成功,继续处理请求")
2374
+
2375
+ # 4.3 AgentID 在线,开始处理请求
2376
+ if agentId and agentId.is_online_success:
2377
+ proxy_logger.info("Agent在线,开始处理请求")
2378
+
2379
+ # ✅ 优化:使用循环实现 session 创建失败时的重试逻辑
2380
+ session_id = None
2381
+ max_session_retries = 2
2382
+
2383
+ for session_attempt in range(max_session_retries):
2384
+ step_start = time.time()
2385
+ proxy_logger.info(f"获取或创建session (尝试 {session_attempt + 1}/{max_session_retries})")
2386
+ session_id = await async_session_manager.get_session(x_api_key, agentId)
2387
+ perf_timings['get_session'] = (time.time() - step_start) * 1000
2388
+
2389
+ if session_id:
2390
+ # 成功获取 session
2391
+ break
2392
+ elif session_attempt < max_session_retries - 1:
2393
+ # 第一次失败,尝试 agentId.online() 重新上线后重试
2394
+ proxy_logger.warning("Session创建失败,尝试重新上线...")
2395
+ print(f"[Async] ⚠️ Session 创建失败 (尝试 {session_attempt + 1}),执行 agentId.online() 重试...")
2396
+
2397
+ try:
2398
+ # 清空 session 缓存
2399
+ _clear_async_session_manager_cache()
2400
+ print("[Async] ✓ 已清空 session 缓存")
2401
+
2402
+ # 调用 agentId.online() 重新上线
2403
+ await asyncio.to_thread(agentId.online)
2404
+ print(f"[Async] ✓ agentId.online() 完成,is_online_success: {agentId.is_online_success}")
2405
+
2406
+ if agentId.is_online_success:
2407
+ # 【重要】AgentID 上线后统一入口 - 初始化监听器等
2408
+ from ..presenter.agentIdPresenter import evol_agentId_online
2409
+ evol_agentId_online(agentId)
2410
+
2411
+ # 重新注册断开回调
2412
+ _register_disconnect_callback(agentId, disable_auto_reconnect=True)
2413
+ print("[Async] 🔄 准备重试获取 session...")
2414
+ else:
2415
+ proxy_logger.error("agentId.online() 后仍未上线")
2416
+ print("[Async] ❌ agentId.online() 后仍未上线,放弃重试")
2417
+ break
2418
+ except Exception as retry_error:
2419
+ proxy_logger.error(f"重新上线过程异常: {str(retry_error)}")
2420
+ print(f"[Async] ❌ 重新上线异常: {retry_error}")
2421
+ break
2422
+
2423
+ if session_id:
2424
+ proxy_logger.info(f"Session获取成功: {session_id}")
2425
+ print(f"[Request] AsyncSessionManager 实例ID: {async_session_manager._instance_id}")
2426
+ print(f"[Request] 准备添加 trace_id: {trace_id} 到 _pending_requests")
2427
+
2428
+ # ✅ 使用trace_id匹配机制(参考llm_agent_utils.py第244-252行)
2429
+ response_event = asyncio.Event()
2430
+ async_session_manager._pending_requests[trace_id] = response_event
2431
+ # ✅ 注册时间戳(用于 TTL 自动清理)
2432
+ async_session_manager.register_request_timestamp(trace_id)
2433
+ print(f"[Request] ✅ 已添加 trace_id: {trace_id} 到 _pending_requests")
2434
+
2435
+ try:
2436
+ # 发送消息(在线程中执行AgentCP操作)
2437
+ # 使用 asyncio.to_thread 避免阻塞事件循环
2438
+ claude_agent_name = await asyncio.to_thread(configPresenter.get_claude_agent_name)
2439
+ proxy_logger.info(f"发送消息到Claude Agent:{claude_agent_name}")
2440
+ print(f"[Async] 📤 准备发送消息:")
2441
+ print(f"[Async] - Session ID: {session_id}")
2442
+ print(f"[Async] - Agent Name: {claude_agent_name}")
2443
+ print(f"[Async] - Trace ID: {trace_id}")
2444
+ print(f"[Async] - Message Type: {proxy_message.get('type', 'N/A')}")
2445
+
2446
+ step_start = time.time()
2447
+ await asyncio.to_thread(
2448
+ agentId.send_message, session_id, [claude_agent_name], proxy_message
2449
+ )
2450
+ perf_timings['send_message'] = (time.time() - step_start) * 1000
2451
+
2452
+ proxy_logger.info("等待Claude Agent响应")
2453
+ print(f"[Async] ⏳ 等待Claude Agent响应, trace_id: {trace_id}")
2454
+
2455
+ wait_start = time.time()
2456
+
2457
+ # ✅ 优化超时:100秒超时(避免客户端100秒超时)
2458
+ # 减少超时时间可以:
2459
+ # 1. 更快地释放协程资源
2460
+ # 2. 减少客户端等待时间
2461
+ # 3. 避免大量协程长时间占用
2462
+ try:
2463
+ await asyncio.wait_for(response_event.wait(), timeout=300)
2464
+ perf_timings['wait_response'] = (time.time() - wait_start) * 1000
2465
+ proxy_logger.info(f"在100秒内收到响应,等待时间: {perf_timings['wait_response']:.2f}ms")
2466
+ # 成功收到响应,重置连续无响应计数
2467
+ _reset_no_response_count()
2468
+ except asyncio.TimeoutError:
2469
+ # 超时处理:增加连续无响应计数
2470
+ no_response_count = _increment_no_response_count()
2471
+ proxy_logger.warning(f"请求超时,trace_id: {trace_id},连续无响应: {no_response_count}/{_consecutive_no_response_threshold}")
2472
+ log_info(f"[Async] 请求超时,trace_id: {trace_id},连续无响应: {no_response_count}/{_consecutive_no_response_threshold}")
2473
+
2474
+ # 只有连续无响应达到阈值时才触发重建
2475
+ if _should_trigger_rebuild():
2476
+ print(f"[Async] ⚠️ 连续 {no_response_count} 次无响应,触发自动重建...")
2477
+ print(f"[Async] 🔧 第1步:执行强制重建 AgentCP 体系...")
2478
+
2479
+ try:
2480
+ # 同步等待重建完成
2481
+ rebuild_result = await force_rebuild_agentcp_system(bypass_cooldown=True)
2482
+
2483
+ # 重建后重置计数器
2484
+ _reset_no_response_count()
2485
+
2486
+ if rebuild_result["success"]:
2487
+ print(f"[Async] ✅ 强制重建成功: {rebuild_result['agent_id']}")
2488
+ print(f"[Async] 🔧 第2步:自动重试请求...")
2489
+
2490
+ # 重建成功,自动重试请求
2491
+ # 需要重新获取全局 agentId(因为重建后已更新)
2492
+ if agentId is None or not agentId.is_online_success:
2493
+ print(f"[Async] ❌ 重建后 AgentID 仍然离线")
2494
+ raise HTTPException(
2495
+ status_code=503,
2496
+ detail="连接重建后仍然离线,请稍后重试"
2497
+ )
2498
+
2499
+ # 生成新的 trace_id 用于重试
2500
+ retry_trace_id = f"retry_{trace_id}"
2501
+ print(f"[Async] 🔄 重试请求,新 trace_id: {retry_trace_id}")
2502
+
2503
+ # 更新 proxy_message 中的 trace_id
2504
+ if isinstance(proxy_message.get("content"), dict):
2505
+ proxy_message["content"]["trace_id"] = retry_trace_id
2506
+ proxy_message["trace_id"] = retry_trace_id
2507
+
2508
+ # 注册新的响应事件
2509
+ retry_response_event = asyncio.Event()
2510
+ async_session_manager._pending_requests[retry_trace_id] = retry_response_event
2511
+ # ✅ 注册重试请求的时间戳(用于 TTL 自动清理)
2512
+ async_session_manager.register_request_timestamp(retry_trace_id)
2513
+
2514
+ try:
2515
+ # 重新发送消息
2516
+ claude_agent_name = await asyncio.to_thread(configPresenter.get_claude_agent_name)
2517
+ print(f"[Async] 📤 重新发送消息到 Claude Agent: {claude_agent_name}")
2518
+
2519
+ await asyncio.to_thread(
2520
+ agentId.send_message, session_id, [claude_agent_name], proxy_message
2521
+ )
2522
+
2523
+ # 等待重试响应(60秒超时)
2524
+ print(f"[Async] ⏳ 等待重试响应...")
2525
+ try:
2526
+ await asyncio.wait_for(retry_response_event.wait(), timeout=300)
2527
+ print(f"[Async] ✅ 重试请求收到响应")
2528
+
2529
+ # 获取重试结果
2530
+ retry_result_data = async_session_manager._request_result_map.get(retry_trace_id)
2531
+ if retry_result_data:
2532
+ # 用重试结果替换原始结果,继续后续处理
2533
+ async_session_manager._request_result_map[trace_id] = retry_result_data
2534
+ perf_timings['wait_response'] = (time.time() - wait_start) * 1000
2535
+ perf_timings['auto_rebuild_retry'] = True
2536
+ # 不抛出异常,让代码继续执行后续的响应处理逻辑
2537
+ else:
2538
+ print(f"[Async] ❌ 重试请求未收到响应数据")
2539
+ raise HTTPException(
2540
+ status_code=503,
2541
+ detail="自动重试后仍未收到响应,请稍后重试"
2542
+ )
2543
+
2544
+ except asyncio.TimeoutError:
2545
+ print(f"[Async] ❌ 重试请求也超时了")
2546
+ raise HTTPException(
2547
+ status_code=504,
2548
+ detail="自动重建并重试后仍然超时,请检查网络连接或联系技术支持"
2549
+ )
2550
+
2551
+ finally:
2552
+ # 清理重试的 trace_id
2553
+ async_session_manager._pending_requests.pop(retry_trace_id, None)
2554
+ async_session_manager._request_result_map.pop(retry_trace_id, None)
2555
+ # ✅ 清理重试请求的时间戳
2556
+ async_session_manager.unregister_request_timestamp(retry_trace_id)
2557
+
2558
+ else:
2559
+ # 重建失败
2560
+ print(f"[Async] ❌ 强制重建失败: {rebuild_result['message']}")
2561
+ raise HTTPException(
2562
+ status_code=503,
2563
+ detail=f"连接自动重建失败: {rebuild_result['message']},请尝试重启 Evol"
2564
+ )
2565
+
2566
+ except HTTPException:
2567
+ # 重新抛出 HTTPException
2568
+ raise
2569
+ except Exception as rebuild_error:
2570
+ print(f"[Async] ❌ 自动重建过程异常: {rebuild_error}")
2571
+ import traceback
2572
+ traceback.print_exc()
2573
+ raise HTTPException(
2574
+ status_code=503,
2575
+ detail=f"连接自动重建异常: {str(rebuild_error)},请尝试重启 Evol"
2576
+ )
2577
+ else:
2578
+ # 未达到重建阈值,直接返回错误让用户重试
2579
+ print(f"[Async] ⚠️ 请求超时(连续 {no_response_count}/{_consecutive_no_response_threshold} 次),返回错误让用户重试")
2580
+ raise HTTPException(
2581
+ status_code=504,
2582
+ detail=f"请求超时,请重试(连续超时 {no_response_count}/{_consecutive_no_response_threshold} 次)"
2583
+ )
2584
+
2585
+ # ✅ 从result_map中获取结果(参考第253-254行)
2586
+ result_data = async_session_manager._request_result_map.get(trace_id)
2587
+ if not result_data:
2588
+ proxy_logger.error(f"未收到响应数据,trace_id: {trace_id}")
2589
+ raise HTTPException(status_code=503, detail="No response received")
2590
+
2591
+ result_type = result_data.get("result_type", "")
2592
+ response_msg = result_data.get("result", {})
2593
+
2594
+
2595
+ proxy_logger.info(f"收到响应,类型: {result_type}, trace_id: {trace_id}")
2596
+
2597
+ # 解析响应消息类型
2598
+ msg_type = result_type
2599
+ content = response_msg.get("content", {}) if isinstance(response_msg, dict) else response_msg
2600
+
2601
+ # 1. 处理错误响应
2602
+ if msg_type == "error":
2603
+ error_message = content if isinstance(content, str) else str(content)
2604
+ http_status = response_msg.get("http_status", 503)
2605
+ headers = response_msg.get("headers", {})
2606
+ proxy_logger.error(f"Claude代理返回错误: {error_message}, trace_id: {trace_id}")
2607
+ log_error(f"[Async] Claude代理返回错误: {error_message}, trace_id: {trace_id}")
2608
+ return Response(
2609
+ content = error_message,
2610
+ status_code = http_status,
2611
+ headers = headers,
2612
+ media_type = headers.get('content-type',"application/json")
2613
+ )
2614
+
2615
+ # 2. 处理流式响应
2616
+ if msg_type == "text/event-stream":
2617
+ stream_url = content if isinstance(content, str) else content.get("url", "")
2618
+ if stream_url:
2619
+ proxy_logger.info(f"Claude代理开始流式响应: {stream_url}, trace_id: {trace_id}")
2620
+ print(f"[Async] Claude代理开始流式响应: {stream_url}, trace_id: {trace_id}")
2621
+ return await handle_stream_response(stream_url)
2622
+ else:
2623
+ proxy_logger.error(f"流式响应缺少URL, trace_id: {trace_id}")
2624
+ log_error(f"[Async] 流式响应缺少URL, trace_id: {trace_id}")
2625
+ raise HTTPException(status_code=504, detail="Stream URL missing")
2626
+
2627
+ # 3. 处理普通成功响应
2628
+ proxy_logger.info("处理普通成功响应")
2629
+ if isinstance(content, dict):
2630
+ status_code = content.get("status_code", 200)
2631
+ response_headers = content.get("headers", {})
2632
+ response_body = content.get("body", "")
2633
+
2634
+ # ✅ 修复:移除会导致客户端解析失败的headers
2635
+ # 上游返回的body已经是解码后的JSON,不是gzip压缩的
2636
+ # 如果保留Content-Encoding: gzip,客户端会尝试解压导致失败
2637
+ headers_to_remove = ['Content-Encoding', 'Transfer-Encoding', 'Content-Length']
2638
+ for header in headers_to_remove:
2639
+ response_headers.pop(header, None)
2640
+ else:
2641
+ status_code = 200
2642
+ response_headers = {"Content-Type": "application/json"}
2643
+ response_body = json.dumps(content) if not isinstance(content, str) else content
2644
+
2645
+ proxy_logger.info(f"Claude代理响应完成: {status_code}, trace_id: {trace_id}")
2646
+ log_info(f"[Async] Claude代理响应完成: {status_code}, trace_id: {trace_id}")
2647
+
2648
+ # 确保response_body是可以编码的类型
2649
+ if isinstance(response_body, dict):
2650
+ # 如果response_body是dict,将其转换为JSON字符串
2651
+ response_body = json.dumps(response_body, ensure_ascii=False)
2652
+ elif isinstance(response_body, (list, tuple)):
2653
+ # 如果是列表或元组,也转换为JSON字符串
2654
+ response_body = json.dumps(response_body, ensure_ascii=False)
2655
+ elif not isinstance(response_body, (str, bytes)):
2656
+ # 如果是其他类型,转换为字符串
2657
+ response_body = str(response_body)
2658
+ real_response = response_body if isinstance(response_body, bytes) else response_body.encode('utf-8')
2659
+
2660
+ proxy_logger.info(f"准备返回响应给客户端,trace_id: {trace_id}")
2661
+ proxy_logger.info(f"状态码: {status_code}")
2662
+ proxy_logger.info(f"响应体大小: {len(real_response)} bytes")
2663
+
2664
+ # ✅ 详细诊断日志
2665
+ print(f"\n{'='*80}")
2666
+ print(f"[Async] 📤 准备返回响应给客户端")
2667
+ print(f" trace_id: {trace_id}")
2668
+ print(f" status_code: {status_code}")
2669
+ print(f" Content-Type: {response_headers.get('Content-Type', 'application/json')}")
2670
+ print(f" 响应体大小: {len(real_response)} bytes")
2671
+ print(f" 响应体前200字符: {real_response[:200]}")
2672
+
2673
+ # ✅ 性能监控:输出性能统计
2674
+ total_time = (time.time() - perf_start) * 1000
2675
+ perf_timings['total'] = total_time
2676
+
2677
+ proxy_logger.info("=" * 80)
2678
+ proxy_logger.info("Claude代理请求处理完成 - 性能统计")
2679
+ proxy_logger.info(f"总耗时: {total_time:.2f}ms")
2680
+ proxy_logger.info("各步骤耗时:")
2681
+ for step_name, duration in perf_timings.items():
2682
+ if step_name != 'total':
2683
+ percentage = (duration / total_time * 100) if total_time > 0 else 0
2684
+ proxy_logger.info(f" - {step_name}: {duration:.2f}ms ({percentage:.1f}%)")
2685
+ proxy_logger.info("=" * 80)
2686
+
2687
+ # 在控制台也输出性能统计(便于实时监控)
2688
+ print(f"[Perf] 总耗时: {total_time:.2f}ms | " +
2689
+ " | ".join([f"{k}: {v:.1f}ms" for k, v in perf_timings.items() if k != 'total']))
2690
+
2691
+ # 标记请求成功(用于积分确认)
2692
+ request_success = True
2693
+
2694
+ return Response(
2695
+ content = real_response,
2696
+ status_code = status_code,
2697
+ headers = response_headers,
2698
+ media_type=response_headers.get('Content-Type',"application/json")
2699
+ )
2700
+
2701
+ except asyncio.TimeoutError:
2702
+ proxy_logger.error(f"Claude代理请求超时: {trace_id}")
2703
+ log_error(f"[Async] Claude代理请求超时: {trace_id}")
2704
+ raise HTTPException(status_code=504, detail="Gateway Timeout")
2705
+ except HTTPException:
2706
+ raise
2707
+ except Exception as e:
2708
+ import traceback
2709
+
2710
+ # 详细的错误信息记录
2711
+ error_details = {
2712
+ "error_type": type(e).__name__,
2713
+ "error_message": str(e),
2714
+ "trace_id": trace_id,
2715
+ "response_body_type": type(locals().get('response_body', None)).__name__ if 'response_body' in locals() else 'unknown',
2716
+ "response_body_value": str(locals().get('response_body', 'N/A'))[:200] if 'response_body' in locals() else 'N/A'
2717
+ }
2718
+
2719
+ full_traceback = traceback.format_exc()
2720
+ proxy_logger.error(f"等待Claude代理响应失败: {str(e)}")
2721
+ proxy_logger.error(f"错误详情: {error_details}")
2722
+ proxy_logger.error(f"完整堆栈跟踪:\n{full_traceback}")
2723
+
2724
+ log_error(f"[Async] 等待Claude代理响应失败: {str(e)}")
2725
+ log_error(f"[Async] 错误详情: {error_details}")
2726
+ log_error(f"[Async] 完整堆栈跟踪: {full_traceback}")
2727
+
2728
+ # 控制台输出详细错误信息
2729
+ print("=" * 80)
2730
+ print(f"🚨 [Async] Claude代理响应处理失败:")
2731
+ print(f"📋 错误类型: {error_details['error_type']}")
2732
+ print(f"💬 错误消息: {error_details['error_message']}")
2733
+ print(f"🔍 追踪ID: {error_details['trace_id']}")
2734
+ print(f"📦 响应体类型: {error_details['response_body_type']}")
2735
+ print(f"📄 响应体内容: {error_details['response_body_value']}")
2736
+ print("📚 完整堆栈跟踪:")
2737
+ print(full_traceback)
2738
+ print("=" * 80)
2739
+
2740
+ raise HTTPException(status_code=500, detail=f"Proxy Error: {str(e)}")
2741
+ finally:
2742
+ # ✅ P2修复:延迟清理trace_id,避免竞态条件
2743
+ # 场景:如果AgentCP的响应消息正在处理中,过早清理会导致响应丢失
2744
+ # 等待100ms确保消息处理器有足够时间存储结果
2745
+ try:
2746
+ await asyncio.sleep(0.1)
2747
+ except asyncio.CancelledError:
2748
+ # 用户断开连接导致协程取消,继续清理
2749
+ pass
2750
+ async_session_manager._pending_requests.pop(trace_id, None)
2751
+ async_session_manager._request_result_map.pop(trace_id, None)
2752
+ # ✅ 清理时间戳记录
2753
+ async_session_manager.unregister_request_timestamp(trace_id)
2754
+
2755
+ else:
2756
+ # 循环重试后仍然失败
2757
+ proxy_logger.error("获取或创建Claude代理会话失败(已重试)")
2758
+ log_error("[Async] 获取或创建Claude代理会话失败(已重试)")
2759
+ raise HTTPException(status_code=503, detail="Service Unavailable: Cannot get or create session")
2760
+ else:
2761
+ # 兜底逻辑:理论上经过前面的重连逻辑后不会进入这里
2762
+ proxy_logger.error("AgentID 未连接(兜底)")
2763
+ log_error("[Async] AgentID 未连接(兜底),请尝试重新启动 Evol")
2764
+ raise HTTPException(status_code=503, detail="Service Unavailable: 连接失败,请尝试重新启动 Evol")
2765
+
2766
+ except HTTPException as e:
2767
+ # ✅ 直接重新抛出 HTTPException,保留原始错误信息
2768
+ proxy_logger.error(f"HTTPException: {e.detail}")
2769
+ raise
2770
+ except Exception as e:
2771
+ import traceback
2772
+ full_traceback = traceback.format_exc()
2773
+
2774
+ proxy_logger.error(f"Claude代理请求处理失败: {str(e)}")
2775
+ proxy_logger.error(f"异常堆栈:\n{full_traceback}")
2776
+
2777
+ traceback.print_exc()
2778
+ print(f"[Async] Claude代理请求处理失败: {str(e)}")
2779
+ raise HTTPException(status_code=500, detail=f"Proxy Error: {str(e)}")
2780
+ finally:
2781
+ # 处理积分预扣除的释放或确认
2782
+ if reserved_credits > 0 and user_id:
2783
+ try:
2784
+
2785
+
2786
+ if request_success:
2787
+ # 请求成功,确认积分使用
2788
+ await credits_lock_manager.confirm_credits_usage(
2789
+ user_id=user_id,
2790
+ amount=reserved_credits,
2791
+ request_id=request_id
2792
+ )
2793
+ proxy_logger.info(f"✅ 积分使用已确认: {reserved_credits}")
2794
+ print(f"[Async] ✅ 积分使用已确认: {reserved_credits}")
2795
+ else:
2796
+ # 请求失败,释放预扣除的积分
2797
+ await credits_lock_manager.release_reserved_credits(
2798
+ user_id=user_id,
2799
+ amount=reserved_credits,
2800
+ request_id=request_id
2801
+ )
2802
+ proxy_logger.info(f"🔄 积分预扣除已释放: {reserved_credits}")
2803
+ print(f"[Async] 🔄 积分预扣除已释放: {reserved_credits}")
2804
+
2805
+ except Exception as e:
2806
+ proxy_logger.error(f"❌ 积分处理失败: {str(e)}")
2807
+ print(f"[Async] ❌ 积分处理失败: {str(e)}")
2808
+
2809
+ # 注意:不再需要关闭 file_handler,因为使用了重用的全局 logger
2810
+
2811
+
2812
+ async def handle_stream_response(stream_url: str):
2813
+ """
2814
+ 处理流式响应(异步版本)
2815
+ 参考llm_agent_utils.py中read_stream_claude的实现
2816
+ 上游Claude Agent发送的是URL编码的SSE流,格式为:
2817
+ data: event: message_start
2818
+ data: data: {...}
2819
+ 需要解码并转换为标准SSE格式:
2820
+ event: message_start
2821
+ data: {...}
2822
+
2823
+ """
2824
+ try:
2825
+ # 添加agent_id参数
2826
+ url = stream_url + "&agent_id=" + agentId.id
2827
+ print(f"[Async] 流式响应开始: {url}")
2828
+
2829
+ async def stream_generator():
2830
+ """
2831
+ SSE流生成器 - 处理上游特殊格式的SSE流
2832
+ 参考: llm_agent_utils.py read_stream_claude() 第386-395行
2833
+
2834
+ 上游格式(URL编码):
2835
+ data: event: message_start
2836
+ data: data: {"type":"message_start"...}
2837
+
2838
+ 转换为标准SSE(参考第370行的输出格式):
2839
+ event: message_start
2840
+ data: {"type":"message_start"...}
2841
+
2842
+ """
2843
+ try:
2844
+ current_event = None # 当前事件类型
2845
+ stream_ended_normally = False # 标记流是否通过 event:done 正常结束
2846
+
2847
+ async with httpx.AsyncClient(verify=False, timeout=httpx.Timeout(30.0, read=300.0)) as client:
2848
+ async with client.stream("GET", url) as response:
2849
+ # 检查响应状态
2850
+ if response.status_code != 200:
2851
+ log_error(f"[Async] 流式响应错误状态码: {response.status_code}")
2852
+ error_msg = f"event: error\ndata: {{\"error\": \"Stream error: HTTP {response.status_code}\"}}\n\n"
2853
+ yield error_msg.encode('utf-8')
2854
+ return
2855
+
2856
+ # 按行处理SSE流
2857
+ # 上游格式是成对出现的:
2858
+ # 行1: data: event: message_start
2859
+ # 行2: data: data: {...}
2860
+ # 需要缓存event,等到data行时一次性输出完整SSE事件
2861
+ async for line in response.aiter_lines():
2862
+ if line:
2863
+ try:
2864
+ # ✅ 关键步骤1:URL解码(参考read_stream_claude第387、390行)
2865
+ decoded_line = urllib.parse.unquote_plus(line)
2866
+
2867
+ # ✅ 关键步骤2:解析格式(参考read_stream_claude第392行)
2868
+ # 上游格式: "data: event: xxx" 或 "data: data: {...}" 或 "event: done"
2869
+ if ":" not in decoded_line:
2870
+ continue
2871
+
2872
+ key, value = decoded_line.split(":", 1)
2873
+ key = key.strip()
2874
+ value = value.strip()
2875
+
2876
+ # ✅ 关键步骤3:转换为标准SSE格式(参考第370行的一次性输出)
2877
+ if key == "data":
2878
+ # 处理 "data: event: xxx" 或 "data: data: {...}"
2879
+ if ":" in value:
2880
+ inner_key, inner_value = value.split(":", 1)
2881
+ inner_key = inner_key.strip()
2882
+ inner_value = inner_value.strip()
2883
+
2884
+ if inner_key == "event":
2885
+ # 缓存事件类型,等待下一行的data
2886
+ current_event = inner_value
2887
+ elif inner_key == "data":
2888
+ # 收到data行,一次性输出完整SSE事件
2889
+ # 格式:event: xxx\ndata: {...}\n\n(参考第370行)
2890
+ if current_event:
2891
+ output = f"event: {current_event}\ndata: {inner_value}\n\n"
2892
+ # print(f"[Async] 流式响应输出1: {output.strip()}")
2893
+ yield output.encode('utf-8')
2894
+ current_event = None
2895
+ else:
2896
+ # 没有event的data行(异常情况,直接输出)
2897
+ output = f"data: {inner_value}\n\n"
2898
+
2899
+ # ✅ 诊断:检查是否为最后一条数据(包含 finishReason)
2900
+ if "finishReason" in inner_value:
2901
+ print(f"[Async] 🎯 检测到最后一条数据(包含 finishReason)")
2902
+ print(f"[Async] 数据长度: {len(inner_value)} 字符")
2903
+ # 尝试解析 JSON 验证完整性
2904
+ try:
2905
+ import json
2906
+ json.loads(inner_value)
2907
+ print(f"[Async] ✓ JSON 格式验证通过")
2908
+ except json.JSONDecodeError as e:
2909
+ print(f"[Async] ✗ JSON 格式无效: {e}")
2910
+
2911
+ # print(f"[Async] 流式响应输出2: {output[:200].strip()}...") # 只打印前200字符
2912
+ yield output.encode('utf-8')
2913
+ elif key == "event":
2914
+ # 处理 "event: done" 格式(参考第395行)
2915
+ if value == "done":
2916
+ # 🔧 修复:收到 event:done 时,不发送任何内容
2917
+ # gemini-cli 不需要显式的结束标记,通过连接关闭判断流结束
2918
+ stream_ended_normally = True
2919
+ print(f"[Async] 🔚 收到 event:done,直接结束流(不发送结束标记)")
2920
+ break
2921
+ else:
2922
+ # 其他独立event(没有配对data的情况)
2923
+ output = f"event: {value}\n\n"
2924
+ yield output.encode('utf-8')
2925
+
2926
+ except Exception as e:
2927
+ log_error(f"[Async] 处理SSE行失败: {str(e)}, 原始行: {line[:100]}")
2928
+ # ✅ 发送错误事件给客户端,而不是静默跳过
2929
+ error_event = f'event: error\ndata: {{"error": "SSE parse error: {str(e)[:100]}"}}\n\n'
2930
+ yield error_event.encode('utf-8')
2931
+ continue
2932
+
2933
+ # ✅ 修复:流结束处理
2934
+ # gemini-cli 不需要显式的结束标记,通过连接关闭判断流结束
2935
+ print(f"[Async] 📍 流式响应 for 循环结束")
2936
+ print(f"[Async] stream_ended_normally = {stream_ended_normally}")
2937
+
2938
+ if not stream_ended_normally:
2939
+ print(f"[Async] 🔚 流式响应自然结束(无 event:done),直接关闭连接")
2940
+ log_info(f"[Async] 流式响应自然结束,无需发送结束标记")
2941
+ else:
2942
+ print(f"[Async] ℹ️ 流式响应已通过 event:done 结束")
2943
+
2944
+ # 不发送任何结束标记,让 StreamingResponse 自然关闭连接
2945
+
2946
+ except httpx.TimeoutException as e:
2947
+ log_error(f"[Async] 流式响应超时: {str(e)}")
2948
+ error_msg = 'event: error\ndata: {"error": "Stream timeout"}\n\n'
2949
+ yield error_msg.encode('utf-8')
2950
+ except httpx.RequestError as e:
2951
+ log_error(f"[Async] 流式请求错误: {str(e)}")
2952
+ error_msg = f'event: error\ndata: {{"error": "Stream request error: {str(e)}"}}\n\n'
2953
+ yield error_msg.encode('utf-8')
2954
+ except Exception as e:
2955
+ log_error(f"[Async] 流式生成器异常: {str(e)}")
2956
+ import traceback
2957
+ log_error(f"[Async] 异常堆栈: {traceback.format_exc()}")
2958
+ error_msg = f'event: error\ndata: {{"error": "Stream processing error: {str(e)}"}}\n\n'
2959
+ yield error_msg.encode('utf-8')
2960
+
2961
+ return StreamingResponse(
2962
+ stream_generator(),
2963
+ media_type="text/event-stream",
2964
+ headers={
2965
+ "Cache-Control": "no-cache",
2966
+ "Connection": "keep-alive",
2967
+ "X-Accel-Buffering": "no"
2968
+ }
2969
+ )
2970
+
2971
+ except httpx.TimeoutException:
2972
+ log_error(f"[Async] 流式请求超时: {stream_url}")
2973
+ raise HTTPException(status_code=504, detail="Stream timeout")
2974
+ except httpx.RequestError as e:
2975
+ log_error(f"[Async] 流式请求失败: {str(e)}")
2976
+ raise HTTPException(status_code=502, detail=f"Stream error: {str(e)}")
2977
+ except Exception as e:
2978
+ log_error(f"[Async] 处理流式响应异常: {str(e)}")
2979
+ raise HTTPException(status_code=500, detail=f"Stream processing error: {str(e)}")
2980
+
2981
+
2982
+ async def handle_slow_request_as_stream(
2983
+ response_event: asyncio.Event,
2984
+ trace_id: str,
2985
+ session_manager
2986
+ ):
2987
+ """
2988
+ 将慢速非流式请求包装为SSE流式响应
2989
+ 用于避免客户端60秒超时问题
2990
+
2991
+ 工作流程:
2992
+ 1. 立即返回SSE流响应(避免客户端超时)
2993
+ 2. 每5秒发送心跳事件,告知客户端"还在处理"
2994
+ 3. 收到真实响应后,通过SSE发送完整数据
2995
+ """
2996
+ async def slow_response_generator():
2997
+ """SSE流生成器 - 用于包装慢速响应"""
2998
+ try:
2999
+ # 发送初始消息
3000
+ init_msg = json.dumps({"type": "processing", "message": "Request is being processed..."})
3001
+ yield f"event: processing\ndata: {init_msg}\n\n".encode('utf-8')
3002
+
3003
+ # 最多再等待190秒(总共240秒)
3004
+ max_wait_time = 190
3005
+ check_interval = 5
3006
+ elapsed = 0
3007
+
3008
+ while elapsed < max_wait_time:
3009
+ try:
3010
+ # 每5秒检查一次是否有响应
3011
+ await asyncio.wait_for(response_event.wait(), timeout=check_interval)
3012
+ # 收到响应,跳出循环
3013
+ break
3014
+ except asyncio.TimeoutError:
3015
+ # 还没收到响应,发送心跳
3016
+ elapsed += check_interval
3017
+ heartbeat = json.dumps({
3018
+ "type": "heartbeat",
3019
+ "message": f"Still processing... ({elapsed}s elapsed)",
3020
+ "elapsed_seconds": elapsed
3021
+ })
3022
+ yield f"event: heartbeat\ndata: {heartbeat}\n\n".encode('utf-8')
3023
+
3024
+ # 检查是否收到了响应
3025
+ result_data = session_manager._request_result_map.get(trace_id)
3026
+ if not result_data:
3027
+ # 超时未收到响应
3028
+ error_msg = json.dumps({
3029
+ "type": "error",
3030
+ "error": {
3031
+ "type": "timeout_error",
3032
+ "message": "Request timeout: No response from upstream after 240 seconds"
3033
+ }
3034
+ })
3035
+ yield f"event: error\ndata: {error_msg}\n\n".encode('utf-8')
3036
+ return
3037
+
3038
+ result_type = result_data.get("result_type", "")
3039
+ response_msg = result_data.get("result", {})
3040
+
3041
+ # 处理错误响应
3042
+ if result_type == "error":
3043
+ content = response_msg.get("content", {}) if isinstance(response_msg, dict) else response_msg
3044
+ error_message = content if isinstance(content, str) else str(content)
3045
+ error_data = json.dumps({
3046
+ "type": "error",
3047
+ "error": {
3048
+ "type": "api_error",
3049
+ "message": error_message
3050
+ }
3051
+ })
3052
+ yield f"event: error\ndata: {error_data}\n\n".encode('utf-8')
3053
+ return
3054
+
3055
+ # 处理成功响应 - 包装成SSE格式
3056
+ content = response_msg.get("content", {}) if isinstance(response_msg, dict) else response_msg
3057
+
3058
+ if isinstance(content, dict):
3059
+ response_body = content.get("body", {})
3060
+ else:
3061
+ response_body = content
3062
+
3063
+ # 将完整响应作为一个SSE事件发送
3064
+ response_json = json.dumps(response_body, ensure_ascii=False)
3065
+ yield f"event: message\ndata: {response_json}\n\n".encode('utf-8')
3066
+
3067
+ # 发送完成事件
3068
+ yield f"event: done\ndata: {json.dumps({'type': 'done'})}\n\n".encode('utf-8')
3069
+
3070
+ log_info(f"[Async] 慢速请求流式响应完成,trace_id: {trace_id}")
3071
+
3072
+ except Exception as e:
3073
+ log_error(f"[Async] 慢速请求流生成器异常: {str(e)}")
3074
+ import traceback
3075
+ log_error(f"[Async] 异常堆栈: {traceback.format_exc()}")
3076
+ error_msg = json.dumps({
3077
+ "type": "error",
3078
+ "error": {
3079
+ "type": "internal_error",
3080
+ "message": f"Stream processing error: {str(e)}"
3081
+ }
3082
+ })
3083
+ yield f"event: error\ndata: {error_msg}\n\n".encode('utf-8')
3084
+
3085
+ return StreamingResponse(
3086
+ slow_response_generator(),
3087
+ media_type="text/event-stream",
3088
+ headers={
3089
+ "Cache-Control": "no-cache",
3090
+ "Connection": "keep-alive",
3091
+ "X-Accel-Buffering": "no",
3092
+ "X-Slow-Request-Wrapper": "true" # 标记这是包装的流式响应
3093
+ }
3094
+ )
3095
+
3096
+
3097
+ async def get_session_status():
3098
+ """获取Session状态信息"""
3099
+ try:
3100
+ session_info = async_session_manager.get_session_info()
3101
+ return JSONResponse(content=session_info, status_code=200)
3102
+ except Exception as e:
3103
+ log_error(f"[Async] 获取session状态失败: {str(e)}")
3104
+ raise HTTPException(status_code=500, detail=f"Session status error: {str(e)}")
3105
+
3106
+
3107
+ def set_agent_id(aid: AgentID):
3108
+ """设置AgentID实例"""
3109
+ global agentId
3110
+ global async_session_manager
3111
+
3112
+ old_agent_id = agentId.id if agentId else None
3113
+ new_agent_id = aid.id if aid else None
3114
+
3115
+ print(f"[set_agent_id] AgentID 变更:{old_agent_id} -> {new_agent_id}")
3116
+
3117
+ # ✅ 关键修复:如果 AgentID 改变,清空所有 sessions 和 handlers
3118
+ # 因为旧的 handlers 注册在旧的 AgentID 上,无法接收新 AgentID 的消息
3119
+ if old_agent_id != new_agent_id and async_session_manager is not None:
3120
+ print(f"[set_agent_id] ⚠️ AgentID 已改变,清空所有 sessions 和 handlers")
3121
+ print(f"[set_agent_id] 清理前 - sessions: {len(async_session_manager._sessions)}, handlers: {len(async_session_manager._handler_registered)}")
3122
+
3123
+ # 清空所有映射
3124
+ async_session_manager._sessions.clear()
3125
+ async_session_manager._session_info.clear()
3126
+ async_session_manager._handler_registered.clear()
3127
+ async_session_manager._pending_requests.clear()
3128
+ async_session_manager._request_result_map.clear()
3129
+ # ✅ 清理时间戳记录
3130
+ if hasattr(async_session_manager, '_request_timestamps'):
3131
+ async_session_manager._request_timestamps.clear()
3132
+
3133
+ print(f"[set_agent_id] ✅ 已清空所有 sessions 和 handlers")
3134
+
3135
+ # 🔧 修复:无论是否清理,都要更新全局 agentId
3136
+ agentId = aid
3137
+
3138
+ # ✅ 关键修复:同步 agentId 到 userPresenter,保持两处一致
3139
+ if aid is not None:
3140
+ userPresenter.agentId = aid
3141
+ print(f"[set_agent_id] ✅ 已同步 AgentID 到 userPresenter")
3142
+
3143
+ # ✅ 注册断开回调,实现自动重建
3144
+ _register_disconnect_callback(aid)
3145
+
3146
+ log_info(f"[Async] Claude Proxy异步版本已初始化,AgentID: {aid.id if aid else 'None'}")
3147
+
3148
+
3149
+ async def init_agent_id_on_startup():
3150
+ """
3151
+ 服务器启动时尝试自动加载当前用户绑定的AgentID
3152
+
3153
+ ⚠️ 重要:必须使用服务器返回的用户绑定AID,绝对不能随意加载本地AID
3154
+ 流程:
3155
+ 1. 检查用户是否已登录,未登录则直接返回(不重试)
3156
+ 2. 调用 userPresenter.user_agent_login() 获取服务器绑定的AID
3157
+ 3. 如果 token 过期或需要重新登录,直接返回(不重试)
3158
+ 4. 其他错误(如网络问题)重试3次,每次间隔2秒
3159
+ """
3160
+ global agentId, _agentcp_instance
3161
+
3162
+ # 防止重复初始化
3163
+ if agentId is not None:
3164
+ print(f"[Startup] AgentID 已存在,跳过重复初始化")
3165
+ print(f"[Startup] 现有 AgentID: {agentId.id}")
3166
+ return
3167
+
3168
+ print(f"[Startup] 开始初始化 AgentID...")
3169
+
3170
+ # 检查用户是否已登录(不重试)
3171
+ if not userPresenter.is_logged_in():
3172
+ print(f"[Startup] 用户未登录,跳过 AgentID 初始化(用户登录后会自动触发)")
3173
+ return
3174
+
3175
+ MAX_RETRIES = 3
3176
+ RETRY_INTERVAL = 2 # 秒
3177
+
3178
+ for attempt in range(1, MAX_RETRIES + 1):
3179
+ try:
3180
+ print(f"[Startup] 用户已登录,调用 user_agent_login 获取服务器绑定的AID... (第 {attempt}/{MAX_RETRIES} 次)")
3181
+
3182
+ login_result = await userPresenter.user_agent_login()
3183
+
3184
+ if login_result.get("status") == "success":
3185
+ # 登录成功,agentId 已经在 user_agent_login 中设置
3186
+ aid_id = login_result.get("aid")
3187
+ print(f"[Startup] ✅ 成功加载服务器绑定的 AgentID: {aid_id}")
3188
+
3189
+ # 确保全局 agentId 已同步
3190
+ if userPresenter.agentId is not None:
3191
+ agentId = userPresenter.agentId
3192
+ _agentcp_instance = userPresenter._get_agentcp_instance()
3193
+
3194
+ # 注册断开回调
3195
+ _register_disconnect_callback(agentId)
3196
+ print(f"[Startup] ✅ AgentID 已同步到全局变量")
3197
+ return
3198
+ else:
3199
+ # 登录失败,检查是否是 token 过期或需要重新登录
3200
+ error_msg = login_result.get("error", "未知错误")
3201
+ need_relogin = login_result.get("need_relogin", False)
3202
+
3203
+ if need_relogin or "token" in error_msg.lower() or "过期" in error_msg or "登录" in error_msg:
3204
+ # token 过期或需要重新登录,不重试
3205
+ print(f"[Startup] Token 过期或需要重新登录,跳过 AgentID 初始化: {error_msg}")
3206
+ return
3207
+
3208
+ # 其他错误(如网络问题),重试
3209
+ print(f"[Startup] ❌ user_agent_login 失败: {error_msg}")
3210
+ if attempt < MAX_RETRIES:
3211
+ print(f"[Startup] ⏳ 等待 {RETRY_INTERVAL} 秒后重试...")
3212
+ await asyncio.sleep(RETRY_INTERVAL)
3213
+
3214
+ except Exception as e:
3215
+ log_error(f"[Startup] 第 {attempt} 次尝试异常: {str(e)}")
3216
+ import traceback
3217
+ log_error(traceback.format_exc())
3218
+ if attempt < MAX_RETRIES:
3219
+ print(f"[Startup] ⏳ 等待 {RETRY_INTERVAL} 秒后重试...")
3220
+ await asyncio.sleep(RETRY_INTERVAL)
3221
+
3222
+ # 所有重试都失败
3223
+ print(f"[Startup] ❌ 已重试 {MAX_RETRIES} 次,AgentID 初始化失败")
3224
+ log_error(f"[Startup] AgentID 初始化失败:已重试 {MAX_RETRIES} 次")
3225
+
3226
+
3227
+ async def reload_agent_id() -> dict:
3228
+ """
3229
+ 重新获取服务AID,重新加载 AgentID
3230
+
3231
+ 此接口用于:
3232
+ 1. 当健康检查失败时,手动触发重新加载
3233
+ 2. 当需要刷新 AgentID 连接时调用
3234
+
3235
+ 流程:
3236
+ 1. 调用 userPresenter.user_agent_login() 重新获取服务端 AID
3237
+ 2. 更新全局 agentId
3238
+ 3. 同步到 evol_health_check 模块
3239
+ 4. 注册断开回调
3240
+
3241
+ Returns:
3242
+ dict: {
3243
+ "success": bool,
3244
+ "message": str,
3245
+ "agent_id": str or None,
3246
+ "is_online": bool
3247
+ }
3248
+ """
3249
+ global agentId, _agentcp_instance
3250
+
3251
+ print("\n" + "=" * 80)
3252
+ print("[ReloadAgentID] 🔄 开始重新加载 AgentID...")
3253
+ print("=" * 80)
3254
+ log_info("[ReloadAgentID] 开始重新加载 AgentID")
3255
+
3256
+ try:
3257
+ # 1. 检查用户是否已登录
3258
+ if not userPresenter.is_logged_in():
3259
+ print("[ReloadAgentID] ❌ 用户未登录")
3260
+ return {
3261
+ "success": False,
3262
+ "message": "用户未登录,无法重新加载 AgentID",
3263
+ "agent_id": None,
3264
+ "is_online": False
3265
+ }
3266
+
3267
+ # 2. 记录旧的 AgentID(如果存在)
3268
+ old_agent_id = agentId.id if agentId else None
3269
+ print(f"[ReloadAgentID] 当前 AgentID: {old_agent_id}")
3270
+
3271
+ # 3. 调用 user_agent_login 重新获取服务端绑定的 AID
3272
+ print("[ReloadAgentID] 📡 调用 user_agent_login 获取服务端 AID...")
3273
+ login_result = await userPresenter.user_agent_login()
3274
+
3275
+ if login_result.get("status") != "success":
3276
+ error_msg = login_result.get("error", "未知错误")
3277
+ print(f"[ReloadAgentID] ❌ user_agent_login 失败: {error_msg}")
3278
+ return {
3279
+ "success": False,
3280
+ "message": f"获取服务端 AID 失败: {error_msg}",
3281
+ "agent_id": old_agent_id,
3282
+ "is_online": agentId.is_online_success if agentId else False
3283
+ }
3284
+
3285
+ # 4. 同步全局 agentId
3286
+ if userPresenter.agentId is not None:
3287
+ new_agent_id = userPresenter.agentId
3288
+ agentId = new_agent_id
3289
+ _agentcp_instance = userPresenter._get_agentcp_instance()
3290
+
3291
+ print(f"[ReloadAgentID] ✅ AgentID 已更新: {old_agent_id} -> {new_agent_id.id}")
3292
+
3293
+ # 5. 清空 AsyncSessionManager 缓存(因为 AgentID 改变了)
3294
+ _clear_async_session_manager_cache()
3295
+ print("[ReloadAgentID] ✅ 已清空 AsyncSessionManager 缓存")
3296
+
3297
+ # 6. 注册断开回调
3298
+ _register_disconnect_callback(new_agent_id, disable_auto_reconnect=True)
3299
+ print("[ReloadAgentID] ✅ 已注册断开回调")
3300
+
3301
+ # 7. 同步到 evol_health_check 模块
3302
+ try:
3303
+ from . import evol_health_check
3304
+ health_checker = evol_health_check.get_health_checker()
3305
+ # 重置 handler 注册状态,下次健康检查时重新注册
3306
+ health_checker._handler_registered = False
3307
+ print("[ReloadAgentID] ✅ 已同步到 evol_health_check 模块")
3308
+ except Exception as e:
3309
+ print(f"[ReloadAgentID] ⚠️ 同步到 evol_health_check 失败(非致命): {e}")
3310
+
3311
+ print("=" * 80)
3312
+ print(f"[ReloadAgentID] 🎉 AgentID 重新加载成功!")
3313
+ print(f"[ReloadAgentID] - AgentID: {new_agent_id.id}")
3314
+ print(f"[ReloadAgentID] - 在线状态: {new_agent_id.is_online_success}")
3315
+ print("=" * 80 + "\n")
3316
+ log_info(f"[ReloadAgentID] 重新加载成功: {new_agent_id.id}")
3317
+
3318
+ return {
3319
+ "success": True,
3320
+ "message": "AgentID 重新加载成功",
3321
+ "agent_id": new_agent_id.id,
3322
+ "is_online": new_agent_id.is_online_success
3323
+ }
3324
+ else:
3325
+ print("[ReloadAgentID] ❌ user_agent_login 成功但 agentId 为空")
3326
+ return {
3327
+ "success": False,
3328
+ "message": "获取 AgentID 成功但实例为空",
3329
+ "agent_id": None,
3330
+ "is_online": False
3331
+ }
3332
+
3333
+ except Exception as e:
3334
+ import traceback
3335
+ error_traceback = traceback.format_exc()
3336
+ print(f"[ReloadAgentID] ❌ 重新加载异常: {e}")
3337
+ print(f"[ReloadAgentID] 异常堆栈:\n{error_traceback}")
3338
+ log_error(f"[ReloadAgentID] 重新加载异常: {e}")
3339
+
3340
+ return {
3341
+ "success": False,
3342
+ "message": f"重新加载异常: {str(e)}",
3343
+ "agent_id": agentId.id if agentId else None,
3344
+ "is_online": agentId.is_online_success if agentId else False
3345
+ }
3346
+
3347
+
3348
+ def get_current_agent_id() -> AgentID:
3349
+ """
3350
+ 获取当前全局 AgentID 实例
3351
+
3352
+ 供其他模块(如 evol_health_check)使用
3353
+
3354
+ Returns:
3355
+ AgentID: 当前的 AgentID 实例,如果未初始化则返回 None
3356
+ """
3357
+ global agentId
3358
+ return agentId
3359
+
3360
+
3361
+ # ==================== OpenClaw Proxy 支持函数 ====================
3362
+
3363
+ def verify_api_key(api_key: str) -> Optional[dict]:
3364
+ """
3365
+ 验证 API Key 并返回用户信息
3366
+
3367
+ Args:
3368
+ api_key: API Key (格式: evol-{32位hex} 或 sk_live_{40位随机字符})
3369
+
3370
+ Returns:
3371
+ dict: 用户信息 {"user_id": str, "username": str} 或 None (验证失败)
3372
+ """
3373
+ try:
3374
+ # 检查 API Key 格式
3375
+ if not api_key:
3376
+ return None
3377
+
3378
+ # 支持两种格式:
3379
+ # 1. evol-{32位hex} (旧格式,用于测试)
3380
+ # 2. sk_live_{40位随机字符} (新格式,标准格式)
3381
+
3382
+ if api_key.startswith("evol-"):
3383
+ # 旧格式:简单验证长度
3384
+ if len(api_key) != 37: # "evol-" + 32位hex
3385
+ return None
3386
+ # 返回模拟用户信息(用于测试)
3387
+ return {
3388
+ "user_id": "test_user",
3389
+ "username": "Test User"
3390
+ }
3391
+
3392
+ elif api_key.startswith("sk_live_"):
3393
+ # 新格式:通过 apikeyPresenter 验证
3394
+ # TODO: 实现真实的 API Key 验证逻辑
3395
+ # 目前返回模拟数据
3396
+ return {
3397
+ "user_id": "api_user",
3398
+ "username": "API User"
3399
+ }
3400
+
3401
+ else:
3402
+ # 不支持的格式
3403
+ return None
3404
+
3405
+ except Exception as e:
3406
+ log_error(f"[verify_api_key] 验证失败: {e}")
3407
+ return None
3408
+
3409
+
3410
+ def check_and_deduct_credits(user_id: str) -> bool:
3411
+ """
3412
+ 检查并扣除用户积分
3413
+
3414
+ Args:
3415
+ user_id: 用户ID
3416
+
3417
+ Returns:
3418
+ bool: True 表示积分充足并已扣除,False 表示积分不足
3419
+ """
3420
+ try:
3421
+ # TODO: 实现真实的积分检查和扣除逻辑
3422
+ # 目前直接返回 True(允许所有请求)
3423
+ return True
3424
+
3425
+ except Exception as e:
3426
+ log_error(f"[check_and_deduct_credits] 检查积分失败: {e}")
3427
+ return False
3428
+
3429
+
3430
+ # 以下为模块其他代码