@agentunion/kite 1.5.0 → 1.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (573) hide show
  1. package/.claude/skills/kite/checklists/feature-checklist.md +496 -0
  2. package/.claude/skills/kite/references/event-patterns.md +180 -0
  3. package/.claude/skills/kite/references/health-check.md +202 -0
  4. package/.claude/skills/kite/references/http-service.md +199 -0
  5. package/.claude/skills/kite/references/module-md-spec.md +172 -0
  6. package/.claude/skills/kite/references/multi-connection.md +147 -0
  7. package/.claude/skills/kite/references/rpc-patterns.md +199 -0
  8. package/.claude/skills/kite/references/shutdown-sequence.md +146 -0
  9. package/.claude/skills/kite/references/stdin-protocol.md +147 -0
  10. package/.claude/skills/kite/references/test-center-integration.md +178 -0
  11. package/.claude/skills/kite/references/ws-lifecycle.md +301 -0
  12. package/.claude/skills/kite/skill.md +272 -0
  13. package/.claude/skills/kite/templates/go/README.md +20 -0
  14. package/.claude/skills/kite/templates/node/entry.js +134 -0
  15. package/.claude/skills/kite/templates/node/module.md +16 -0
  16. package/.claude/skills/kite/templates/node/server.js +351 -0
  17. package/.claude/skills/kite/templates/node/server_http.js +90 -0
  18. package/.claude/skills/kite/templates/python/entry.py +425 -0
  19. package/.claude/skills/kite/templates/python/module.md +26 -0
  20. package/.claude/skills/kite/templates/python/server.py +447 -0
  21. package/.claude/skills/kite/templates/python/server_http.py +433 -0
  22. package/cli.js +38 -4
  23. package/core/env_checker.py +96 -0
  24. package/docs/05-/347/237/255/344/277/241/350/256/244/350/257/201/344/270/216/347/224/250/346/210/267/344/277/241/346/201/257/346/216/245/345/217/243/346/226/207/346/241/243.md +507 -0
  25. package/docs/ACP/345/215/217/350/256/256/345/205/274/345/256/271/346/226/271/346/241/210.md +138 -0
  26. package/docs/CI/344/270/216AI/350/207/252/345/212/250/345/214/226/346/265/213/350/257/225/346/226/271/346/241/210.md +75 -0
  27. package/docs/CLI/345/274/200/345/217/221/350/256/241/345/210/222.md +595 -0
  28. package/docs/ClaudeCode/350/277/234/347/250/213/345/215/217/344/275/234/347/263/273/347/273/237-/346/212/200/346/234/257/350/257/204/344/274/260.md +535 -0
  29. package/docs/ClaudeCode/350/277/234/347/250/213/345/215/217/344/275/234/347/263/273/347/273/237/350/256/276/350/256/241.md +631 -0
  30. package/docs/Evol-App/344/275/277/347/224/250KernelClient/346/224/271/351/200/240/345/256/214/346/210/220.md +342 -0
  31. package/docs/Evol/346/216/247/345/210/266/345/217/260/346/217/222/344/273/266/345/214/226/346/236/266/346/236/204/346/246/202/350/246/201.md +604 -0
  32. package/docs/Evol/346/216/247/345/210/266/345/217/260/346/217/222/344/273/266/345/214/226/346/236/266/346/236/204/350/256/276/350/256/241.md +1708 -0
  33. package/docs/Evol/346/250/241/345/235/227/350/256/276/350/256/241/346/226/271/346/241/210.md +1154 -0
  34. package/docs/Evol/351/241/265/351/235/242/346/217/222/344/273/266/345/214/226-Evol/346/250/241/345/235/227/345/256/236/346/226/275/346/214/207/345/215/227.md +403 -0
  35. package/docs/Evol/351/241/265/351/235/242/346/217/222/344/273/266/345/214/226-/345/244/226/351/203/250/346/250/241/345/235/227/346/216/245/345/205/245/346/214/207/345/215/227.md +468 -0
  36. package/docs/HTTP-RPC/350/277/201/347/247/273/345/210/260WebSocket/350/256/241/345/210/222.md +318 -0
  37. package/docs/INDEX.md +388 -0
  38. package/docs/KITE_DOCS_GUIDE.md +33 -0
  39. package/docs/Kernel-Client-Kite-Token/346/224/257/346/214/201/345/256/236/346/226/275/345/256/214/346/210/220.md +330 -0
  40. package/docs/Kernel/344/270/273/345/212/250Ping/346/234/272/345/210/266-/346/255/243/347/241/256/345/256/236/347/216/260.md +235 -0
  41. package/docs/Kernel/344/270/273/345/212/250Ping/346/234/272/345/210/266/345/256/236/346/226/275/346/200/273/347/273/223.md +204 -0
  42. package/docs/Kite/345/256/211/350/243/205/351/227/256/351/242/230/350/247/243/345/206/263/346/226/271/346/241/210.md +362 -0
  43. package/docs/Kite/346/216/247/345/210/266/345/217/260/346/217/222/344/273/266/345/214/226/346/236/266/346/236/204/350/256/276/350/256/241-/347/273/210/346/236/201/347/233/256/346/240/207.md +721 -0
  44. package/docs/Kite/346/216/247/345/210/266/345/217/260/347/273/237/344/270/200WebSocket/346/224/271/351/200/240/346/226/271/346/241/210.md +821 -0
  45. package/docs/Kite/346/241/206/346/236/266/350/256/276/350/256/241/01-/346/241/206/346/236/266/345/256/232/344/275/215.md +12 -0
  46. package/docs/Kite/346/241/206/346/236/266/350/256/276/350/256/241/02-/346/240/270/345/277/203/346/246/202/345/277/265.md +341 -0
  47. package/docs/Kite/346/241/206/346/236/266/350/256/276/350/256/241/03-/347/263/273/347/273/237/346/236/266/346/236/204.md +257 -0
  48. package/docs/Kite/346/241/206/346/236/266/350/256/276/350/256/241/04-/346/250/241/345/235/227/350/247/204/350/214/203.md +263 -0
  49. package/docs/Kite/346/241/206/346/236/266/350/256/276/350/256/241/05-/346/240/270/345/277/203/346/265/201/347/250/213-/346/226/260/347/211/210.md +267 -0
  50. package/docs/Kite/346/241/206/346/236/266/350/256/276/350/256/241/05-/346/240/270/345/277/203/346/265/201/347/250/213.md +149 -0
  51. package/docs/Kite/346/241/206/346/236/266/350/256/276/350/256/241/06-/347/233/256/345/275/225/347/273/223/346/236/204.md +231 -0
  52. package/docs/Kite/346/241/206/346/236/266/350/256/276/350/256/241/07-/346/225/260/346/215/256/346/250/241/345/236/213.md +68 -0
  53. package/docs/Kite/346/241/206/346/236/266/350/256/276/350/256/241/08-/346/211/251/345/261/225/346/200/247.md +34 -0
  54. package/docs/Kite/346/241/206/346/236/266/350/256/276/350/256/241/09-/344/270/216/345/205/267/344/275/223/345/272/224/347/224/250/347/232/204/345/205/263/347/263/273.md +22 -0
  55. package/docs/Kite/346/241/206/346/236/266/350/256/276/350/256/241/README.md +46 -0
  56. package/docs/Kite/347/263/273/347/273/237/345/220/257/345/212/250/346/265/201/347/250/213.md +567 -0
  57. package/docs/Launcher/345/220/257/345/212/250/345/231/250/346/226/207/346/241/243.md +745 -0
  58. package/docs/Polyglot/350/277/220/350/241/214/346/227/266/344/270/216Clawdbot/345/205/274/345/256/271/346/200/247/350/256/276/350/256/241.md +321 -0
  59. package/docs/Redis/344/270/216/346/250/241/345/235/227/345/244/232/345/256/236/344/276/213/346/226/271/346/241/210.md +438 -0
  60. package/docs/Relay-Kite-Token/350/256/244/350/257/201/345/256/236/346/226/275/345/256/214/346/210/220.md +178 -0
  61. package/docs/Relay-Token/346/235/203/351/231/220/351/205/215/347/275/256/351/252/214/350/257/201.md +113 -0
  62. package/docs/Watchdog/345/201/245/345/272/267/346/243/200/346/237/245/344/270/216WebSocket-Ping/346/234/272/345/210/266/345/210/206/346/236/220.md +367 -0
  63. package/docs/Watchdog/350/265/204/346/272/220/347/233/221/346/216/247/347/255/226/347/225/245.md +92 -0
  64. package/docs/WebSocket/346/216/245/346/224/266/345/276/252/347/216/257/346/255/273/351/224/201/351/230/262/350/214/203/350/247/204/350/214/203.md +357 -0
  65. package/docs/WebSocket/350/277/236/346/216/245/351/237/247/346/200/247/344/270/216/351/207/215/350/277/236/346/234/272/345/210/266/345/256/214/346/225/264/346/226/271/346/241/210.md +531 -0
  66. package/docs/WebSocket/350/277/236/346/216/245/351/237/247/346/200/247/346/226/271/346/241/210.md +169 -0
  67. package/docs/WebSocket/351/207/215/350/277/236/346/234/272/345/210/266/346/265/213/350/257/225/346/212/245/345/221/212.md +169 -0
  68. package/docs/WebSocket/351/207/215/350/277/236/351/200/200/351/201/277/346/234/272/345/210/266/346/226/271/346/241/210.md +394 -0
  69. package/docs/Web/346/250/241/345/235/227/344/270/216Evol/346/250/241/345/235/227/351/207/215/346/236/204/345/210/206/346/236/220.md +521 -0
  70. package/docs/audit-api-guide.md +68 -0
  71. package/docs/audit-module-design.md +315 -0
  72. package/docs/audit-module-implementation-summary.md +149 -0
  73. package/docs/llm-context-design.md +52 -0
  74. package/docs/llm-test-enhancement-plan.md +970 -0
  75. package/docs/logs-api-guide.md +42 -0
  76. package/docs/npm/345/214/205Python/347/216/257/345/242/203/347/256/241/347/220/206/346/226/271/346/241/210.md +302 -0
  77. package/docs/npm/345/217/221/345/270/203/344/270/216CLI/344/275/277/347/224/250/346/214/207/345/215/227.md +245 -0
  78. package/docs/stdio/344/270/216/347/253/257/345/217/243/345/217/221/347/216/260/351/207/215/346/236/204.md +480 -0
  79. package/docs/web/346/250/241/345/235/227/344/270/255/350/275/254/346/234/215/345/212/241/350/256/276/350/256/241/346/226/271/346/241/210.md +449 -0
  80. package/docs//344/272/213/344/273/266/345/244/204/347/220/206/346/234/272/345/210/266.md +388 -0
  81. package/docs//344/272/213/344/273/266/345/244/204/347/220/206/350/247/204/350/214/203.md +113 -0
  82. package/docs//344/272/213/344/273/266/350/256/242/351/230/205/351/200/232/351/205/215/347/254/246/350/247/204/350/214/203.md +256 -0
  83. package/docs//344/272/213/344/273/266/351/230/237/345/210/227/345/274/271/346/200/247/347/256/241/347/220/206.md +449 -0
  84. package/docs//344/272/244/344/272/222/345/274/217/347/273/210/347/253/257/346/216/247/345/210/266/346/226/271/346/241/210.md +301 -0
  85. package/docs//344/273/243/347/220/206/345/220/257/345/212/250/345/231/250/344/270/216/345/256/271/345/231/250/345/214/226.md +140 -0
  86. package/docs//344/273/243/347/240/201/347/273/237/350/256/241/345/267/245/345/205/267/344/275/277/347/224/250/350/257/264/346/230/216.md +217 -0
  87. package/docs//344/274/230/351/233/205/351/200/200/345/207/272/350/247/204/350/214/203.md +362 -0
  88. package/docs//344/276/235/350/265/226/347/256/241/347/220/206/350/257/264/346/230/216.md +141 -0
  89. package/docs//344/277/256/345/244/215/346/235/203/351/231/220/351/227/256/351/242/230-evol-RPC/346/235/203/351/231/220.md +268 -0
  90. package/docs//345/210/240/351/231/244kernel-client-example/345/256/214/346/210/220.md +309 -0
  91. package/docs//345/210/240/351/231/244ws-management/345/256/214/346/210/220.md +418 -0
  92. package/docs//345/220/257/345/212/250/344/274/230/345/214/226/346/226/271/346/241/210.md +522 -0
  93. package/docs//345/220/257/345/212/250/344/276/235/350/265/226/344/270/216/346/216/222/345/272/217.md +105 -0
  94. package/docs//345/256/211/350/243/205/350/204/232/346/234/254/345/274/200/345/217/221/346/226/207/346/241/243.md +643 -0
  95. package/docs//345/256/214/346/225/264/345/220/257/345/212/250/346/265/201/347/250/213/350/256/276/350/256/241.md +452 -0
  96. package/docs//345/256/236/347/216/260/350/247/204/345/210/222.md +195 -0
  97. package/docs//345/277/203/350/267/263/346/234/272/345/210/266/351/207/215/346/236/204/346/200/273/347/273/223.md +166 -0
  98. package/docs//346/217/241/346/211/213/350/256/244/350/257/201/346/226/271/346/241/210-/345/256/211/345/205/250/345/256/241/346/237/245.md +176 -0
  99. package/docs//346/217/241/346/211/213/350/256/244/350/257/201/346/226/271/346/241/210.md +908 -0
  100. package/docs//346/226/207/346/241/243/346/233/264/346/226/260/346/270/205/345/215/225.md +83 -0
  101. package/docs//346/227/245/345/277/227/344/270/216/345/274/202/345/270/270/345/244/204/347/220/206/350/247/204/350/214/203.md +829 -0
  102. package/docs//346/227/245/345/277/227/350/260/203/350/257/225/345/256/236/346/210/230/346/214/207/345/215/227.md +25 -0
  103. package/docs//346/236/266/346/236/204/345/200/237/351/211/264/346/214/207/345/215/227.md +977 -0
  104. package/docs//346/236/266/346/236/204/346/224/271/351/200/240-/345/256/214/346/210/220/346/200/273/347/273/223.md +440 -0
  105. package/docs//346/236/266/346/236/204/347/216/260/347/212/266/344/270/216/347/273/210/346/236/201/347/233/256/346/240/207/345/257/271/346/257/224/345/210/206/346/236/220.md +508 -0
  106. package/docs//346/250/241/345/235/227/345/244/232/350/277/236/346/216/245/346/216/247/345/210/266/347/255/226/347/225/245.md +220 -0
  107. package/docs//346/250/241/345/235/227/345/256/211/350/243/205/346/234/272/345/210/266/350/256/276/350/256/241.md +500 -0
  108. package/docs//346/250/241/345/235/227/345/274/200/345/217/221/346/214/207/345/215/227.md +1824 -0
  109. package/docs//346/250/241/345/235/227/347/203/255/346/233/264/346/226/260.md +89 -0
  110. package/docs//346/250/241/345/235/227/350/277/234/347/250/213/351/203/250/347/275/262/345/274/200/345/217/221/350/247/204/350/214/203.md +460 -0
  111. package/docs//346/250/241/345/235/227/351/200/200/345/207/272/346/234/272/345/210/266/345/256/214/346/225/264/346/226/271/346/241/210.md +303 -0
  112. package/docs//346/250/241/345/235/227/351/205/215/347/275/256/345/212/240/350/275/275/344/270/216/347/203/255/351/207/215/350/275/275/350/247/204/350/214/203.md +369 -0
  113. package/docs//346/265/213/350/257/225/344/270/255/345/277/203/346/267/273/345/212/240/346/250/241/345/235/227/346/265/213/350/257/225/346/214/207/345/215/227.md +147 -0
  114. package/docs//347/211/210/346/234/254/351/224/201/345/256/232/347/216/257/345/242/203/347/256/241/347/220/206/346/226/271/346/241/210.md +331 -0
  115. package/docs//347/216/257/345/242/203/345/217/230/351/207/217/344/270/216/350/277/220/350/241/214/346/227/266/347/233/256/345/275/225/350/256/276/350/256/241.md +499 -0
  116. package/docs//347/216/257/345/242/203/347/256/241/347/220/206/345/256/214/346/225/264/346/226/271/346/241/210.md +334 -0
  117. package/docs//350/231/232/346/213/237/346/250/241/345/235/227/344/270/255/350/275/254/346/234/215/345/212/241/345/256/214/346/225/264/350/256/276/350/256/241.md +1496 -0
  118. package/docs//350/231/232/346/213/237/347/216/257/345/242/203/345/267/245/344/275/234/345/216/237/347/220/206.md +163 -0
  119. package/docs//350/256/241/345/210/222/347/256/241/347/220/206/345/231/250/344/275/277/347/224/250/346/214/207/345/215/227.md +196 -0
  120. package/docs//350/256/244/350/257/201/346/250/241/345/235/227/344/270/216Gateway/350/256/276/350/256/241/346/226/271/346/241/210.md +765 -0
  121. package/docs//350/277/234/347/250/213/346/250/241/345/235/227/350/256/276/350/256/241-/346/227/247/347/211/210.md +1117 -0
  122. package/docs//350/277/234/347/250/213/346/250/241/345/235/227/350/256/276/350/256/241.md +451 -0
  123. package/docs//351/207/215/346/236/204/346/234/272/345/210/266/346/270/205/345/215/225.md +192 -0
  124. package/docs//351/223/276/350/267/257/350/277/275/350/270/252/346/226/271/346/241/210.md +242 -0
  125. package/docs//351/231/215/347/272/247/347/255/226/347/225/245/350/256/276/350/256/241/346/226/271/346/241/210.md +618 -0
  126. package/extensions/agents/assistant/entry.py +113 -14
  127. package/extensions/agents/assistant/module.md +27 -22
  128. package/extensions/agents/assistant/server.py +291 -105
  129. package/extensions/channels/acp_channel/entry.py +114 -16
  130. package/extensions/channels/acp_channel/module.md +4 -0
  131. package/extensions/channels/acp_channel/server.py +396 -105
  132. package/extensions/channels/phone_channel/__init__.py +1 -0
  133. package/extensions/channels/phone_channel/entry.py +503 -0
  134. package/extensions/channels/phone_channel/module.md +31 -0
  135. package/extensions/channels/phone_channel/server.py +686 -0
  136. package/extensions/event_hub_bench/entry.py +55 -12
  137. package/extensions/event_hub_bench/module.md +27 -27
  138. package/extensions/services/audit/README.md +134 -0
  139. package/extensions/services/audit/collector.py +73 -0
  140. package/extensions/services/audit/entry.py +444 -0
  141. package/extensions/services/audit/module.md +66 -0
  142. package/extensions/services/audit/query_audit.py +111 -0
  143. package/extensions/services/audit/routes/__init__.py +1 -0
  144. package/extensions/services/audit/routes/routes_audit.py +113 -0
  145. package/extensions/services/audit/schemas/__init__.py +5 -0
  146. package/extensions/services/audit/schemas/audit_event.py +92 -0
  147. package/extensions/services/audit/server.py +542 -0
  148. package/extensions/services/audit/storage.py +95 -0
  149. package/extensions/services/auth/entry.py +1054 -0
  150. package/extensions/services/auth/module.md +31 -0
  151. package/extensions/services/auth/token_store.py +185 -0
  152. package/extensions/services/auth/verifiers/evol_account.py +101 -0
  153. package/extensions/services/auth/verifiers/kite_token.py +38 -0
  154. package/extensions/services/auth/verifiers/pairing_code.py +71 -0
  155. package/extensions/services/backup/entry.py +494 -197
  156. package/extensions/services/backup/module.md +4 -2
  157. package/extensions/services/dataclaw/api/__init__.py +0 -0
  158. package/extensions/services/dataclaw/api/admin.py +367 -0
  159. package/extensions/services/dataclaw/api/copyright.py +175 -0
  160. package/extensions/services/dataclaw/api/credits.py +177 -0
  161. package/extensions/services/dataclaw/api/data.py +179 -0
  162. package/extensions/services/dataclaw/api/demands.py +269 -0
  163. package/extensions/services/dataclaw/api/feeds.py +262 -0
  164. package/extensions/services/dataclaw/api/identity.py +505 -0
  165. package/extensions/services/dataclaw/api/notifications.py +104 -0
  166. package/extensions/services/dataclaw/api/reviews.py +138 -0
  167. package/extensions/services/dataclaw/api/search.py +153 -0
  168. package/extensions/services/dataclaw/api/subscriptions.py +157 -0
  169. package/extensions/services/dataclaw/config.json5 +96 -0
  170. package/extensions/services/dataclaw/core/__init__.py +0 -0
  171. package/extensions/services/dataclaw/core/auth.py +95 -0
  172. package/extensions/services/dataclaw/core/config.py +50 -0
  173. package/extensions/services/dataclaw/core/database.py +70 -0
  174. package/extensions/services/dataclaw/entry.py +416 -0
  175. package/extensions/services/dataclaw/gofeed/351/241/271/347/233/256/346/211/200/346/234/211/346/235/203/350/275/254/347/247/273/346/265/201/347/250/213/350/257/264/346/230/216.md +309 -0
  176. package/extensions/services/dataclaw/migrate.py +283 -0
  177. package/extensions/services/dataclaw/models/__init__.py +0 -0
  178. package/extensions/services/dataclaw/module.md +49 -0
  179. package/extensions/services/dataclaw/requirements.txt +18 -0
  180. package/extensions/services/dataclaw/server.py +759 -0
  181. package/extensions/services/dataclaw/services/__init__.py +0 -0
  182. package/extensions/services/dataclaw/services/agent_service.py +132 -0
  183. package/extensions/services/dataclaw/services/credit_service.py +235 -0
  184. package/extensions/services/dataclaw/services/email_service.py +140 -0
  185. package/extensions/services/dataclaw/services/feed_service.py +259 -0
  186. package/extensions/services/dataclaw/services/notification_service.py +209 -0
  187. package/extensions/services/dataclaw/services/oauth_service.py +275 -0
  188. package/extensions/services/dataclaw/services/pricing.py +102 -0
  189. package/extensions/services/dataclaw/services/quality.py +79 -0
  190. package/extensions/services/dataclaw/services/reputation.py +142 -0
  191. package/extensions/services/dataclaw/services/sms_service.py +174 -0
  192. package/extensions/services/dataclaw/static/css/common.css +853 -0
  193. package/extensions/services/dataclaw/static/css/themes/blue.css +42 -0
  194. package/extensions/services/dataclaw/static/css/themes/dark.css +42 -0
  195. package/extensions/services/dataclaw/static/css/themes/light.css +35 -0
  196. package/extensions/services/dataclaw/static/js/api.js +103 -0
  197. package/extensions/services/dataclaw/static/js/common.js +321 -0
  198. package/extensions/services/dataclaw/static/js/i18n.js +95 -0
  199. package/extensions/services/dataclaw/static/js/pages/admin.js +152 -0
  200. package/extensions/services/dataclaw/static/js/pages/dashboard.js +82 -0
  201. package/extensions/services/dataclaw/static/js/pages/feed-detail.js +180 -0
  202. package/extensions/services/dataclaw/static/js/pages/feed-manage.js +158 -0
  203. package/extensions/services/dataclaw/static/js/theme.js +46 -0
  204. package/extensions/services/dataclaw/static/locales/en-US.json +464 -0
  205. package/extensions/services/dataclaw/static/locales/ja-JP.json +464 -0
  206. package/extensions/services/dataclaw/static/locales/zh-CN.json +464 -0
  207. package/extensions/services/dataclaw/templates/admin/index.html +90 -0
  208. package/extensions/services/dataclaw/templates/base.html +136 -0
  209. package/extensions/services/dataclaw/templates/credits/balance.html +106 -0
  210. package/extensions/services/dataclaw/templates/credits/deposit.html +164 -0
  211. package/extensions/services/dataclaw/templates/credits/history.html +90 -0
  212. package/extensions/services/dataclaw/templates/dashboard.html +52 -0
  213. package/extensions/services/dataclaw/templates/demands/create.html +78 -0
  214. package/extensions/services/dataclaw/templates/demands/detail.html +136 -0
  215. package/extensions/services/dataclaw/templates/demands/list.html +94 -0
  216. package/extensions/services/dataclaw/templates/feeds/create.html +95 -0
  217. package/extensions/services/dataclaw/templates/feeds/detail.html +110 -0
  218. package/extensions/services/dataclaw/templates/feeds/list.html +110 -0
  219. package/extensions/services/dataclaw/templates/feeds/manage.html +88 -0
  220. package/extensions/services/dataclaw/templates/index.html +185 -0
  221. package/extensions/services/dataclaw/templates/login.html +246 -0
  222. package/extensions/services/dataclaw/templates/register.html +164 -0
  223. package/extensions/services/dataclaw/templates/settings/notifications.html +96 -0
  224. package/extensions/services/dataclaw/templates/settings/profile.html +167 -0
  225. package/extensions/services/dataclaw/templates/subscriptions/list.html +64 -0
  226. package/extensions/services/dataclaw/tests/__init__.py +0 -0
  227. package/extensions/services/dataclaw/tests/conftest.py +68 -0
  228. package/extensions/services/dataclaw/tests/integration/__init__.py +0 -0
  229. package/extensions/services/dataclaw/tests/integration/test_workflows.py +239 -0
  230. package/extensions/services/dataclaw/tests/unit/__init__.py +0 -0
  231. package/extensions/services/dataclaw/tests/unit/test_admin.py +70 -0
  232. package/extensions/services/dataclaw/tests/unit/test_copyright.py +63 -0
  233. package/extensions/services/dataclaw/tests/unit/test_credits.py +80 -0
  234. package/extensions/services/dataclaw/tests/unit/test_data.py +98 -0
  235. package/extensions/services/dataclaw/tests/unit/test_demands.py +106 -0
  236. package/extensions/services/dataclaw/tests/unit/test_feeds.py +98 -0
  237. package/extensions/services/dataclaw/tests/unit/test_identity.py +88 -0
  238. package/extensions/services/dataclaw/tests/unit/test_notifications.py +36 -0
  239. package/extensions/services/dataclaw/tests/unit/test_reviews.py +68 -0
  240. package/extensions/services/dataclaw/tests/unit/test_search.py +64 -0
  241. package/extensions/services/dataclaw/tests/unit/test_subscriptions.py +65 -0
  242. package/extensions/services/dataclaw/tests/unit/test_system.py +106 -0
  243. package/extensions/services/dataclaw/utils/__init__.py +0 -0
  244. package/extensions/services/dataclaw/utils/crypto.py +38 -0
  245. package/extensions/services/dataclaw/utils/id_generator.py +52 -0
  246. package/extensions/services/dataclaw/ws/__init__.py +0 -0
  247. package/extensions/services/dataclaw/ws/handler.py +163 -0
  248. package/extensions/services/dataclaw//345/215/217/350/256/2561-/351/241/271/347/233/256/346/235/241/344/273/266/346/216/210/346/235/203/344/270/216/350/202/241/346/235/203/345/257/271/344/273/267/345/215/217/350/256/256.md +243 -0
  249. package/extensions/services/dataclaw//345/215/217/350/256/2562-/351/241/271/347/233/256/350/264/255/344/271/260/346/235/203/344/270/216/345/244/226/345/214/205/345/247/224/346/211/230/345/274/200/345/217/221/345/215/217/350/256/256.md +434 -0
  250. package/extensions/services/evol/__init__.py +1 -0
  251. package/extensions/services/evol/async_http.py +551 -0
  252. package/extensions/services/evol/auth_manager.py +602 -443
  253. package/extensions/services/evol/config.json5 +16 -0
  254. package/extensions/services/evol/entry.py +568 -406
  255. package/extensions/services/evol/evol_api.py +969 -173
  256. package/extensions/services/evol/mfa_totp.py +77 -0
  257. package/extensions/services/evol/module.md +150 -32
  258. package/extensions/services/evol/nonce_pool.py +113 -0
  259. package/extensions/services/evol/oauth_manager.py +223 -0
  260. package/extensions/services/evol/pairing.py +3 -2
  261. package/extensions/services/evol/pairing_codes.jsonl +1 -0
  262. package/extensions/services/evol/relay.py +1031 -682
  263. package/extensions/services/evol/relay_config.json5 +85 -67
  264. package/extensions/services/evol/routes/routes_llm.py +231 -0
  265. package/extensions/services/evol/routes/routes_rpc.py +90 -89
  266. package/extensions/services/evol/routes/routes_test.py +11 -4
  267. package/extensions/services/evol/server.py +2426 -875
  268. package/extensions/services/evol/static/assets/CommissionView-Cs_ys6Gm.js +1 -0
  269. package/extensions/services/evol/static/assets/CommissionView-DACet_Oo.css +1 -0
  270. package/extensions/services/evol/static/assets/IframePage-DbO11U9G.js +1 -0
  271. package/extensions/services/evol/static/assets/IframePage-c572lT8i.css +1 -0
  272. package/extensions/services/evol/static/assets/TeamDetailView-DULrGD7k.css +1 -0
  273. package/extensions/services/evol/static/assets/TeamDetailView-gy_MBEqG.js +139 -0
  274. package/extensions/services/evol/static/assets/element-plus-Bd7pZkkM.js +63 -0
  275. package/extensions/services/evol/static/assets/index-CmMONKzG.css +1 -0
  276. package/extensions/services/evol/static/assets/index-D44bBe__.js +2 -0
  277. package/extensions/services/evol/static/assets/vue-vendor-DtF-__I4.js +29 -0
  278. package/extensions/services/evol/static/index.html +16 -781
  279. package/extensions/services/evol/static/logo.png +0 -0
  280. package/extensions/services/evol/stats_manager.py +243 -240
  281. package/extensions/services/evol/web/README.md +89 -0
  282. package/extensions/services/evol/web/build.bat +44 -0
  283. package/extensions/services/evol/web/index.html +13 -0
  284. package/extensions/services/evol/web/package-lock.json +1718 -0
  285. package/extensions/services/evol/web/package.json +26 -0
  286. package/extensions/services/evol/web/public/logo.png +0 -0
  287. package/extensions/services/evol/web/src/App.vue +7 -0
  288. package/extensions/services/evol/web/src/components/layout/AppHeader.vue +202 -0
  289. package/extensions/services/evol/web/src/components/layout/AppLayout.vue +61 -0
  290. package/extensions/services/evol/web/src/components/layout/AppSidebar.vue +115 -0
  291. package/extensions/services/evol/web/src/components/login/LoginPage.vue +271 -0
  292. package/extensions/services/evol/web/src/components/team/AddMemberModal.vue +181 -0
  293. package/extensions/services/evol/web/src/components/team/GroupTreeNode.vue +156 -0
  294. package/extensions/services/evol/web/src/components/team/TeamAlertConfig.vue +221 -0
  295. package/extensions/services/evol/web/src/components/team/TeamBillModal.vue +165 -0
  296. package/extensions/services/evol/web/src/components/team/TeamMembersAndGroups.vue +499 -0
  297. package/extensions/services/evol/web/src/components/team/TeamStatsPanel.vue +907 -0
  298. package/extensions/services/evol/web/src/components/team/TreeNode.vue +331 -0
  299. package/extensions/services/evol/web/src/components/team/stats/StatsExportProgress.vue +44 -0
  300. package/extensions/services/evol/web/src/components/team/stats/StatsHeader.vue +89 -0
  301. package/extensions/services/evol/web/src/components/team/stats/StatsMemberDetail.vue +415 -0
  302. package/extensions/services/evol/web/src/components/team/stats/StatsSummary.vue +42 -0
  303. package/extensions/services/evol/web/src/components/team/stats/helpers.ts +195 -0
  304. package/extensions/services/evol/web/src/components/team/stats/stats.css +741 -0
  305. package/extensions/services/evol/web/src/components/team/stats/useStatsApi.ts +114 -0
  306. package/extensions/services/evol/web/src/components/team/stats/useStatsCharts.ts +242 -0
  307. package/extensions/services/evol/web/src/components/team/stats/useStatsExport.ts +232 -0
  308. package/extensions/services/evol/web/src/composables/useFormatters.ts +42 -0
  309. package/extensions/services/evol/web/src/composables/useTheme.ts +52 -0
  310. package/extensions/services/evol/web/src/env.d.ts +7 -0
  311. package/extensions/services/evol/web/src/i18n/en.ts +361 -0
  312. package/extensions/services/evol/web/src/i18n/index.ts +36 -0
  313. package/extensions/services/evol/web/src/i18n/zh.ts +379 -0
  314. package/extensions/services/evol/web/src/main.ts +21 -0
  315. package/extensions/services/evol/web/src/router/index.ts +81 -0
  316. package/extensions/services/evol/web/src/services/kernel-client.ts +406 -0
  317. package/extensions/services/evol/web/src/stores/auth.ts +189 -0
  318. package/extensions/services/evol/web/src/stores/connection.ts +134 -0
  319. package/extensions/services/evol/web/src/stores/pages.ts +79 -0
  320. package/extensions/services/evol/web/src/styles/base.css +213 -0
  321. package/extensions/services/evol/web/src/styles/variables.css +138 -0
  322. package/extensions/services/evol/web/src/types/rpc.ts +35 -0
  323. package/extensions/services/evol/web/src/types/token.ts +87 -0
  324. package/extensions/services/evol/web/src/views/AccountView.vue +1532 -0
  325. package/extensions/services/evol/web/src/views/AiServiceView.vue +219 -0
  326. package/extensions/services/evol/web/src/views/CommissionView.vue +1220 -0
  327. package/extensions/services/evol/web/src/views/CreditsView.vue +131 -0
  328. package/extensions/services/evol/web/src/views/EndpointView.vue +163 -0
  329. package/extensions/services/evol/web/src/views/IframePage.vue +120 -0
  330. package/extensions/services/evol/web/src/views/TeamDetailView.vue +473 -0
  331. package/extensions/services/evol/web/src/views/TeamView.vue +332 -0
  332. package/extensions/services/evol/web/tsconfig.json +31 -0
  333. package/extensions/services/evol/web/tsconfig.node.json +10 -0
  334. package/extensions/services/evol/web/vite.config.ts +49 -0
  335. package/extensions/services/evolmem/__init__.py +0 -0
  336. package/extensions/services/evolmem/entry.py +387 -0
  337. package/extensions/services/evolmem/hooks/__init__.py +0 -0
  338. package/extensions/services/evolmem/hooks/assistant_stop.py +228 -0
  339. package/extensions/services/evolmem/hooks/common.py +76 -0
  340. package/extensions/services/evolmem/hooks/pre_tool_use.py +56 -0
  341. package/extensions/services/evolmem/hooks/session_end.py +133 -0
  342. package/extensions/services/evolmem/hooks/session_start.py +229 -0
  343. package/extensions/services/evolmem/hooks/user_prompt.py +122 -0
  344. package/extensions/services/evolmem/module.md +48 -0
  345. package/extensions/services/evolmem/prompts/00-server-info.md +28 -0
  346. package/extensions/services/evolmem/prompts/01-behavior.md +46 -0
  347. package/extensions/services/evolmem/prompts/02-summary-format.md +112 -0
  348. package/extensions/services/evolmem/prompts/03-file-query.md +92 -0
  349. package/extensions/services/evolmem/prompts/04-topic-stats.md +11 -0
  350. package/extensions/services/evolmem/prompts/05-recent-topics.md +84 -0
  351. package/extensions/services/evolmem/scripts/__init__.py +0 -0
  352. package/extensions/services/evolmem/scripts/extract_keywords.py +40 -0
  353. package/extensions/services/evolmem/scripts/search_topics.py +91 -0
  354. package/extensions/services/evolmem/server.py +641 -0
  355. package/extensions/services/gateway/entry.py +964 -0
  356. package/extensions/services/gateway/module.md +29 -0
  357. package/extensions/services/gateway/nonce_pool.py +65 -0
  358. package/extensions/services/gateway/relay.py +133 -0
  359. package/extensions/services/gateway/ws_server.py +285 -0
  360. package/extensions/services/kite_console/auth_manager.py +603 -0
  361. package/extensions/services/kite_console/config.json5 +19 -0
  362. package/extensions/services/kite_console/config_loader.py +117 -0
  363. package/extensions/services/kite_console/entry.py +528 -0
  364. package/extensions/services/kite_console/evol_api.py +179 -0
  365. package/extensions/services/kite_console/evol_config.json5 +29 -0
  366. package/extensions/services/kite_console/mfa_totp.py +77 -0
  367. package/extensions/services/kite_console/migrate_tokens.py +122 -0
  368. package/extensions/services/kite_console/module.md +37 -0
  369. package/extensions/services/kite_console/nonce_pool.py +113 -0
  370. package/extensions/services/kite_console/oauth_manager.py +223 -0
  371. package/extensions/services/kite_console/pairing.py +280 -0
  372. package/extensions/services/kite_console/pairing_codes.jsonl +2 -0
  373. package/extensions/services/kite_console/relay.py +1350 -0
  374. package/extensions/services/kite_console/relay_config.json5 +96 -0
  375. package/extensions/services/kite_console/routes/__init__.py +1 -0
  376. package/extensions/services/kite_console/routes/routes_llm.py +231 -0
  377. package/extensions/services/kite_console/routes/routes_proxy.py +115 -0
  378. package/extensions/services/kite_console/routes/routes_rpc.py +89 -0
  379. package/extensions/services/kite_console/routes/routes_test.py +68 -0
  380. package/extensions/services/kite_console/server.py +1742 -0
  381. package/extensions/services/{evol → kite_console}/static/css/style.css +656 -2
  382. package/extensions/services/kite_console/static/index.html +1524 -0
  383. package/extensions/services/{evol → kite_console}/static/js/dialog.js +11 -4
  384. package/extensions/services/kite_console/static/js/evol-app.js +7740 -0
  385. package/extensions/services/{evol/static/js/evol-app.js → kite_console/static/js/evol-app.js.backup} +2777 -1949
  386. package/extensions/services/kite_console/static/js/kernel-client.js +560 -0
  387. package/extensions/services/{evol/static/js/kernel-client.js → kite_console/static/js/kernel-client.js.backup} +41 -3
  388. package/extensions/services/{evol → kite_console}/static/js/registry-tests.js +7 -0
  389. package/extensions/services/kite_console/static/js/tests/ARCHITECTURE.md +67 -0
  390. package/extensions/services/kite_console/static/js/tests/README.md +140 -0
  391. package/extensions/services/kite_console/static/js/tests/index.js +161 -0
  392. package/extensions/services/kite_console/static/js/tests/integration/auth.js +120 -0
  393. package/extensions/services/kite_console/static/js/tests/integration/channel-interaction.js +188 -0
  394. package/extensions/services/kite_console/static/js/tests/integration/elastic-connection.js +115 -0
  395. package/extensions/services/kite_console/static/js/tests/integration/full-workflow.js +43 -0
  396. package/extensions/services/kite_console/static/js/tests/integration/multi-instance.js +304 -0
  397. package/extensions/services/kite_console/static/js/tests/integration/nested-rpc.js +266 -0
  398. package/extensions/services/kite_console/static/js/tests/integration/pingpong.js +25 -0
  399. package/extensions/services/kite_console/static/js/tests/integration/redis.js +227 -0
  400. package/extensions/services/kite_console/static/js/tests/integration/registry-core.js +52 -0
  401. package/extensions/services/kite_console/static/js/tests/integration/remote-deploy.js +85 -0
  402. package/extensions/services/kite_console/static/js/tests/integration/require-init.js +96 -0
  403. package/extensions/services/kite_console/static/js/tests/integration/scaling-control.js +193 -0
  404. package/extensions/services/kite_console/static/js/tests/integration/trace.js +109 -0
  405. package/extensions/services/kite_console/static/js/tests/modules/acp_channel.js +339 -0
  406. package/extensions/services/kite_console/static/js/tests/modules/auth.js +96 -0
  407. package/extensions/services/kite_console/static/js/tests/modules/backup.js +49 -0
  408. package/extensions/services/kite_console/static/js/tests/modules/gateway.js +41 -0
  409. package/extensions/services/kite_console/static/js/tests/modules/kernel.js +90 -0
  410. package/extensions/services/kite_console/static/js/tests/modules/launcher.js +75 -0
  411. package/extensions/services/kite_console/static/js/tests/modules/multi_instance.js +129 -0
  412. package/extensions/services/kite_console/static/js/tests/modules/phone_channel.js +364 -0
  413. package/extensions/services/kite_console/static/js/tests/modules/redis.js +178 -0
  414. package/extensions/services/kite_console/static/js/tests/modules/watchdog.js +60 -0
  415. package/extensions/services/kite_console/static/js/tests/modules/web.js +70 -0
  416. package/extensions/services/kite_console/static/js/tests/test-runner.js +123 -0
  417. package/extensions/services/kite_console/static/js/virtual-list.js +200 -0
  418. package/extensions/services/kite_console/static/test_kernel_client_token.html +352 -0
  419. package/extensions/services/kite_console/stats_manager.py +247 -0
  420. package/extensions/services/logs/README.md +215 -0
  421. package/extensions/services/logs/api_logger.py +37 -0
  422. package/extensions/services/logs/baseline.py +121 -0
  423. package/extensions/services/logs/cleaner.py +76 -0
  424. package/extensions/services/logs/entry.py +449 -0
  425. package/extensions/services/logs/formatter.py +129 -0
  426. package/extensions/services/logs/module.md +38 -0
  427. package/extensions/services/logs/quick_diagnostic.py +128 -0
  428. package/extensions/services/logs/routes/__init__.py +1 -0
  429. package/extensions/services/logs/routes/routes_logs.py +218 -0
  430. package/extensions/services/logs/routes/routes_logs.py.backup +173 -0
  431. package/extensions/services/logs/scanner.py +100 -0
  432. package/extensions/services/logs/searcher.py +263 -0
  433. package/extensions/services/logs/server.py +553 -0
  434. package/extensions/services/logs.zip +0 -0
  435. package/extensions/services/model_service/config.json5 +30 -0
  436. package/extensions/services/model_service/entry.py +620 -171
  437. package/extensions/services/model_service/module.md +11 -2
  438. package/extensions/services/proxy/__init__.py +0 -0
  439. package/extensions/services/proxy/aid_manager.py +419 -0
  440. package/extensions/services/proxy/auth_bridge.py +182 -0
  441. package/extensions/services/proxy/config_store.py +79 -0
  442. package/extensions/services/proxy/entry.py +528 -0
  443. package/extensions/services/proxy/evol/presenter/agentIdPresenter.py +2 -2
  444. package/extensions/services/proxy/evol/presenter/apikeyPresenter.py +18 -28
  445. package/extensions/services/proxy/evol/presenter/configPresenter.py +80 -1127
  446. package/extensions/services/proxy/evol/presenter/userPresenter.py +71 -477
  447. package/extensions/services/proxy/evol/server/claude_proxy_async.py +11 -7
  448. package/extensions/services/proxy/module.md +151 -0
  449. package/extensions/services/proxy/server.py +952 -271
  450. package/extensions/services/redis/ALIGNMENT_CHECKLIST.md +121 -0
  451. package/extensions/services/redis/ALIGNMENT_STATUS.md +548 -0
  452. package/extensions/services/redis/config.json5 +8 -0
  453. package/extensions/services/redis/entry.py +1509 -0
  454. package/extensions/services/redis/entry.py.backup +405 -0
  455. package/extensions/services/redis/module.md +48 -0
  456. package/extensions/services/redis/redis_builtin.py +332 -0
  457. package/extensions/services/redis/redis_external.py +164 -0
  458. package/extensions/services/testUi/entry.py +446 -0
  459. package/extensions/services/testUi/module.md +18 -0
  460. package/extensions/services/testUi/ui/cards.html +131 -0
  461. package/extensions/services/testUi/ui/index.html +22 -0
  462. package/extensions/services/testUi/ui/particles.html +143 -0
  463. package/extensions/services/watchdog/entry.py +1258 -793
  464. package/extensions/services/watchdog/module.md +2 -0
  465. package/extensions/services/watchdog/monitor.py +465 -87
  466. package/extensions/services/web/auth_manager.py +602 -0
  467. package/extensions/services/web/config.json5 +11 -0
  468. package/extensions/services/web/entry.py +598 -478
  469. package/extensions/services/web/mfa_totp.py +77 -0
  470. package/extensions/services/web/module.md +16 -13
  471. package/extensions/services/web/nonce_pool.py +113 -0
  472. package/extensions/services/web/oauth_manager.py +223 -0
  473. package/extensions/services/web/pairing.py +3 -2
  474. package/extensions/services/web/pairing_codes.jsonl +1 -0
  475. package/extensions/services/web/relay.py +442 -63
  476. package/extensions/services/web/relay_config.json5 +1 -2
  477. package/extensions/services/web/routes/routes_rpc.py +6 -6
  478. package/extensions/services/web/server.py +360 -173
  479. package/extensions/services/web/static/index.html +1752 -1738
  480. package/extensions/services/web/static/js/app.js +32 -0
  481. package/extensions/services/web/static/js/kernel-client.js +48 -9
  482. package/extensions/services/web/vendor/bluetooth/audio.py +1 -1
  483. package/extensions/services/web/vendor/config.py +2 -2
  484. package/extensions/services/web/vendor/storage/identity.py +1 -1
  485. package/kernel/entry.py +77 -23
  486. package/kernel/event_hub.py +1122 -74
  487. package/kernel/module.md +2 -1
  488. package/kernel/registry_store.py +208 -11
  489. package/kernel/rpc_router.py +1400 -491
  490. package/kernel/server.py +1021 -134
  491. package/kite_cli/builders/__init__.py +4 -0
  492. package/kite_cli/builders/base.py +67 -0
  493. package/kite_cli/builders/custom.py +31 -0
  494. package/kite_cli/builders/detector.py +56 -0
  495. package/kite_cli/builders/go.py +34 -0
  496. package/kite_cli/builders/gradle.py +41 -0
  497. package/kite_cli/builders/maven.py +36 -0
  498. package/kite_cli/builders/npm.py +44 -0
  499. package/kite_cli/builders/python.py +37 -0
  500. package/kite_cli/commands/BUILD_GUIDE.md +109 -0
  501. package/kite_cli/commands/build.py +142 -0
  502. package/kite_cli/commands/check.py +60 -0
  503. package/kite_cli/commands/config.py +156 -0
  504. package/kite_cli/commands/deps.py +58 -0
  505. package/kite_cli/commands/deps_install.py +7 -7
  506. package/kite_cli/commands/disable.py +162 -0
  507. package/kite_cli/commands/enable.py +162 -0
  508. package/kite_cli/commands/export.py +96 -0
  509. package/kite_cli/commands/import_cmd.py +110 -0
  510. package/kite_cli/commands/install.py +50 -23
  511. package/kite_cli/commands/install_skill.py +107 -0
  512. package/kite_cli/commands/list.py +128 -31
  513. package/kite_cli/commands/outdated.py +202 -0
  514. package/kite_cli/commands/search.py +33 -17
  515. package/kite_cli/commands/update.py +115 -2
  516. package/kite_cli/commands/venv_setup.py +6 -6
  517. package/kite_cli/commands/why.py +48 -0
  518. package/kite_cli/core/config_manager.py +145 -0
  519. package/kite_cli/core/downloader.py +32 -2
  520. package/kite_cli/main.py +151 -5
  521. package/kite_cli/utils/colors.py +153 -0
  522. package/kite_cli/utils/dependency_graph.py +209 -0
  523. package/kite_cli/utils/process.py +55 -0
  524. package/kite_cli/utils/progress.py +207 -0
  525. package/kite_cli/utils/table.py +101 -0
  526. package/launcher/count_lines.py +192 -43
  527. package/launcher/entry.py +4543 -2802
  528. package/launcher/logging_setup.py +54 -1
  529. package/launcher/module.md +32 -6
  530. package/launcher/module_scanner.py +93 -20
  531. package/launcher/process_manager.py +355 -76
  532. package/main.py +6 -0
  533. package/package.json +4 -1
  534. package/requirements.txt +41 -38
  535. package/scripts/auto-fix-deps.py +128 -0
  536. package/scripts/env-manager.js +25 -2
  537. package/scripts/final-test.js +78 -0
  538. package/scripts/setup-python-env.js +700 -191
  539. package/scripts/test-alluser.js +48 -0
  540. package/scripts/test-different-version.js +86 -0
  541. package/scripts/test-direct.js +63 -0
  542. package/scripts/test-extract-installer.js +28 -0
  543. package/scripts/test-install-log.js +54 -0
  544. package/scripts/test-installer.js +39 -0
  545. package/scripts/test-integration.js +250 -0
  546. package/scripts/test-real-install.js +210 -0
  547. package/scripts/test-targetdir.js +49 -0
  548. package/scripts/test-venv-real.js +47 -0
  549. package/scripts/test-venv-simple.js +57 -0
  550. package/scripts/test-wait.js +49 -0
  551. package/scripts/test-with-log.js +63 -0
  552. package/extensions/services/evol/config.yaml +0 -149
  553. package/extensions/services/evol/routes/routes_management_ws.py +0 -127
  554. package/extensions/services/evol/static/index_evol.html +0 -14
  555. package/extensions/services/evol/static/js/app.js +0 -6304
  556. package/extensions/services/evol/static/js/auth.js +0 -326
  557. package/extensions/services/evol/static/js/evol-app-fixed.js +0 -50
  558. package/extensions/services/evol/static/js/evol-app.js.bak +0 -1800
  559. package/extensions/services/evol/static/js/kernel-client-example.js +0 -228
  560. package/extensions/services/evol/static/js/main.js +0 -141
  561. package/extensions/services/evol/static/js/stats.js +0 -217
  562. package/extensions/services/evol/static/js/token-manager.js +0 -175
  563. package/extensions/services/proxy/CHANGELOG_20260308.md +0 -258
  564. package/extensions/services/proxy/_fix_prints.py +0 -133
  565. package/extensions/services/proxy/_fix_prints2.py +0 -87
  566. package/extensions/services/proxy/console_auth.py +0 -109
  567. package/extensions/services/proxy/logs/websocket.log +0 -260
  568. package/extensions/services/proxy/main.py +0 -240
  569. package/extensions/services/proxy/requirements.txt +0 -13
  570. package/extensions/services/web/config.yaml +0 -149
  571. /package/extensions/services/{evol → kite_console}/static/pairing.html +0 -0
  572. /package/extensions/services/{evol → kite_console}/static/test_registry.html +0 -0
  573. /package/extensions/services/{evol → kite_console}/static/test_relay.html +0 -0
@@ -1,793 +1,1258 @@
1
- """
2
- Watchdog entry point.
3
- Connects to Kernel via WebSocket JSON-RPC 2.0, registers, subscribes to events,
4
- runs health monitor loop, handles incoming RPC requests.
5
- """
6
-
7
- import asyncio
8
- import builtins
9
- import json
10
- import os
11
- import re
12
- import sys
13
- import threading
14
- import time
15
- import traceback
16
- import uuid
17
- from datetime import datetime, timezone
18
-
19
- import websockets
20
-
21
-
22
- # ── Module configuration ──
23
-
24
- def _load_module_config() -> dict:
25
- """Load module configuration from module.md frontmatter.
26
-
27
- Returns:
28
- Dict with keys: name, preferred_port, advertise_ip
29
-
30
- Raises:
31
- SystemExit: If module.md is invalid or name is non-compliant
32
- """
33
- _this_dir = os.path.dirname(os.path.abspath(__file__))
34
- module_md = os.path.join(_this_dir, "module.md")
35
-
36
- # Calculate relative path for error messages
37
- project_root = os.environ.get("KITE_PROJECT", "")
38
- if project_root and _this_dir.startswith(project_root):
39
- rel_path = os.path.relpath(_this_dir, project_root)
40
- else:
41
- rel_path = _this_dir
42
-
43
- # Default values (will be overridden if valid config exists)
44
- result = {
45
- "name": "",
46
- "preferred_port": 0,
47
- "advertise_ip": "0.0.0.0"
48
- }
49
-
50
- # Check if module.md exists
51
- if not os.path.exists(module_md):
52
- print(f"[{rel_path}] ERROR: Invalid module configuration in module.md")
53
- print(f" Path: {rel_path}/module.md")
54
- print(f" Reason: File not found")
55
- sys.exit(1)
56
-
57
- try:
58
- with open(module_md, encoding="utf-8") as f:
59
- text = f.read()
60
-
61
- # Extract YAML frontmatter (between --- markers)
62
- import re
63
- m = re.match(r'^---\s*\n(.*?)\n---', text, re.DOTALL)
64
- if not m:
65
- print(f"[{rel_path}] ERROR: Invalid module configuration in module.md")
66
- print(f" Path: {rel_path}/module.md")
67
- print(f" Reason: Missing YAML frontmatter")
68
- sys.exit(1)
69
-
70
- # Parse YAML frontmatter
71
- try:
72
- import yaml
73
- fm = yaml.safe_load(m.group(1)) or {}
74
- except ImportError:
75
- print(f"[{rel_path}] ERROR: PyYAML not installed, cannot parse module.md")
76
- sys.exit(1)
77
- except Exception as e:
78
- print(f"[{rel_path}] ERROR: Invalid module configuration in module.md")
79
- print(f" Path: {rel_path}/module.md")
80
- print(f" Reason: YAML parse error: {e}")
81
- sys.exit(1)
82
-
83
- # Validate 'name' field (required)
84
- if "name" not in fm:
85
- print(f"[{rel_path}] ERROR: Invalid module configuration in module.md")
86
- print(f" Path: {rel_path}/module.md")
87
- print(f" Reason: Missing 'name' field")
88
- sys.exit(1)
89
-
90
- raw_name = str(fm["name"]).strip()
91
-
92
- if not raw_name:
93
- print(f"[{rel_path}] ERROR: Invalid module configuration in module.md")
94
- print(f" Path: {rel_path}/module.md")
95
- print(f" Reason: Empty module name")
96
- sys.exit(1)
97
-
98
- # Validate name characters
99
- sanitized = re.sub(r'[^a-zA-Z0-9_\-]', '', raw_name)
100
-
101
- if sanitized != raw_name:
102
- invalid_chars = ''.join(sorted(set(c for c in raw_name if c not in sanitized)))
103
- print(f"[{rel_path}] ERROR: Invalid module configuration in module.md")
104
- print(f" Path: {rel_path}/module.md")
105
- print(f" Reason: Invalid characters in name '{raw_name}': {repr(invalid_chars)}")
106
- sys.exit(1)
107
-
108
- result["name"] = sanitized
109
-
110
- # Extract optional fields
111
- if "preferred_port" in fm:
112
- try:
113
- result["preferred_port"] = int(fm["preferred_port"])
114
- except (ValueError, TypeError):
115
- pass
116
-
117
- if "advertise_ip" in fm:
118
- result["advertise_ip"] = str(fm["advertise_ip"])
119
-
120
- except SystemExit:
121
- raise # Re-raise exit to prevent catching by outer except
122
- except Exception as e:
123
- print(f"[{rel_path}] ERROR: Failed to read module.md: {e}")
124
- sys.exit(1)
125
-
126
- return result
127
-
128
- _module_config = _load_module_config()
129
- MODULE_NAME = _module_config["name"]
130
-
131
-
132
- def _fmt_elapsed(t0: float) -> str:
133
- """Format elapsed time since t0: <1s → 'NNNms', >=1s → 'N.Ns', >=10s → 'NNs'."""
134
- d = time.monotonic() - t0
135
- if d < 1:
136
- return f"{d * 1000:.0f}ms"
137
- if d < 10:
138
- return f"{d:.1f}s"
139
- return f"{d:.0f}s"
140
-
141
-
142
- # ── Safe stdout/stderr: ignore BrokenPipeError after Launcher closes stdio ──
143
-
144
- class _SafeWriter:
145
- """Wraps a stream to silently swallow BrokenPipeError on write/flush."""
146
- def __init__(self, stream):
147
- self._stream = stream
148
-
149
- def write(self, s):
150
- try:
151
- self._stream.write(s)
152
- except (BrokenPipeError, OSError):
153
- pass
154
-
155
- def flush(self):
156
- try:
157
- self._stream.flush()
158
- except (BrokenPipeError, OSError):
159
- pass
160
-
161
- def __getattr__(self, name):
162
- return getattr(self._stream, name)
163
-
164
- sys.stdout = _SafeWriter(sys.stdout)
165
- sys.stderr = _SafeWriter(sys.stderr)
166
-
167
-
168
- # ── Timestamped print + log file writer ──
169
-
170
- _builtin_print = builtins.print
171
- _start_ts = time.monotonic()
172
- _last_ts = time.monotonic()
173
- _ANSI_RE = re.compile(r"\033\[[0-9;]*m")
174
- _log_lock = threading.Lock()
175
- _log_latest_path = None
176
- _log_daily_path = None
177
- _log_daily_date = ""
178
- _log_dir = None
179
- _crash_log_path = None
180
-
181
- def _strip_ansi(s: str) -> str:
182
- return _ANSI_RE.sub("", s)
183
-
184
- def _resolve_daily_log_path():
185
- """Resolve daily log path based on current date."""
186
- global _log_daily_path, _log_daily_date
187
- if not _log_dir:
188
- return
189
- today = datetime.now().strftime("%Y-%m-%d")
190
- if today == _log_daily_date and _log_daily_path:
191
- return
192
- month_dir = os.path.join(_log_dir, today[:7])
193
- os.makedirs(month_dir, exist_ok=True)
194
- _log_daily_path = os.path.join(month_dir, f"{today}.log")
195
- _log_daily_date = today
196
-
197
- def _write_log(plain_line: str):
198
- """Write a plain-text line to both latest.log and daily log."""
199
- with _log_lock:
200
- if _log_latest_path:
201
- try:
202
- with open(_log_latest_path, "a", encoding="utf-8") as f:
203
- f.write(plain_line)
204
- except Exception:
205
- pass
206
- _resolve_daily_log_path()
207
- if _log_daily_path:
208
- try:
209
- with open(_log_daily_path, "a", encoding="utf-8") as f:
210
- f.write(plain_line)
211
- except Exception:
212
- pass
213
-
214
-
215
- def _write_crash(exc_type, exc_value, exc_tb, thread_name=None, severity="critical", handled=False):
216
- """Write crash record to crashes.jsonl + daily crash archive."""
217
- record = {
218
- "timestamp": datetime.now(timezone.utc).isoformat(),
219
- "module": MODULE_NAME,
220
- "thread": thread_name or threading.current_thread().name,
221
- "exception_type": exc_type.__name__ if exc_type else "Unknown",
222
- "exception_message": str(exc_value),
223
- "traceback": "".join(traceback.format_exception(exc_type, exc_value, exc_tb)),
224
- "severity": severity,
225
- "handled": handled,
226
- "process_id": os.getpid(),
227
- "platform": sys.platform,
228
- "runtime_version": f"Python {sys.version.split()[0]}",
229
- }
230
-
231
- if exc_tb:
232
- tb_entries = traceback.extract_tb(exc_tb)
233
- if tb_entries:
234
- last = tb_entries[-1]
235
- record["context"] = {
236
- "function": last.name,
237
- "file": os.path.basename(last.filename),
238
- "line": last.lineno,
239
- }
240
-
241
- line = json.dumps(record, ensure_ascii=False) + "\n"
242
-
243
- if _crash_log_path:
244
- try:
245
- with open(_crash_log_path, "a", encoding="utf-8") as f:
246
- f.write(line)
247
- except Exception:
248
- pass
249
-
250
- if _log_dir:
251
- try:
252
- today = datetime.now().strftime("%Y-%m-%d")
253
- archive_dir = os.path.join(_log_dir, "crashes", today[:7])
254
- os.makedirs(archive_dir, exist_ok=True)
255
- archive_path = os.path.join(archive_dir, f"{today}.jsonl")
256
- with open(archive_path, "a", encoding="utf-8") as f:
257
- f.write(line)
258
- except Exception:
259
- pass
260
-
261
-
262
- def _print_crash_summary(exc_type, exc_tb, thread_name=None):
263
- """Print crash summary to console (red highlight)."""
264
- RED = "\033[91m"
265
- RESET = "\033[0m"
266
-
267
- if exc_tb:
268
- tb_entries = traceback.extract_tb(exc_tb)
269
- if tb_entries:
270
- last = tb_entries[-1]
271
- location = f"{os.path.basename(last.filename)}:{last.lineno}"
272
- else:
273
- location = "unknown"
274
- else:
275
- location = "unknown"
276
-
277
- prefix = f"[{MODULE_NAME}]"
278
- if thread_name:
279
- _builtin_print(f"{prefix} {RED}线程 {thread_name} 崩溃: "
280
- f"{exc_type.__name__} in {location}{RESET}")
281
- else:
282
- _builtin_print(f"{prefix} {RED}崩溃: {exc_type.__name__} in {location}{RESET}")
283
- if _crash_log_path:
284
- _builtin_print(f"{prefix} 崩溃日志: {_crash_log_path}")
285
-
286
- def _setup_exception_hooks():
287
- """Set up global exception hooks."""
288
- _orig_excepthook = sys.excepthook
289
-
290
- def _excepthook(exc_type, exc_value, exc_tb):
291
- _write_crash(exc_type, exc_value, exc_tb, severity="critical", handled=False)
292
- _print_crash_summary(exc_type, exc_tb)
293
- _orig_excepthook(exc_type, exc_value, exc_tb)
294
-
295
- sys.excepthook = _excepthook
296
-
297
- if hasattr(threading, "excepthook"):
298
- def _thread_excepthook(args):
299
- _write_crash(args.exc_type, args.exc_value, args.exc_traceback,
300
- thread_name=args.thread.name if args.thread else "unknown",
301
- severity="error", handled=False)
302
- _print_crash_summary(args.exc_type, args.exc_traceback,
303
- thread_name=args.thread.name if args.thread else None)
304
-
305
- threading.excepthook = _thread_excepthook
306
-
307
- def _tprint(*args, **kwargs):
308
- """Timestamped print that adds [timestamp] HH:MM:SS.mmm +delta prefix."""
309
- global _last_ts
310
- now = time.monotonic()
311
- elapsed = now - _start_ts
312
- delta = now - _last_ts
313
- _last_ts = now
314
-
315
- if elapsed < 1:
316
- elapsed_str = f"{elapsed * 1000:.0f}ms"
317
- elif elapsed < 100:
318
- elapsed_str = f"{elapsed:.1f}s"
319
- else:
320
- elapsed_str = f"{elapsed:.0f}s"
321
-
322
- if delta < 0.001:
323
- delta_str = ""
324
- elif delta < 1:
325
- delta_str = f"+{delta * 1000:.0f}ms"
326
- elif delta < 100:
327
- delta_str = f"+{delta:.1f}s"
328
- else:
329
- delta_str = f"+{delta:.0f}s"
330
-
331
- ts = datetime.now().strftime("%H:%M:%S.%f")[:-3]
332
-
333
- _builtin_print(*args, **kwargs)
334
-
335
- if _log_latest_path or _log_daily_path:
336
- sep = kwargs.get("sep", " ")
337
- end = kwargs.get("end", "\n")
338
- text = sep.join(str(a) for a in args)
339
- prefix = f"[{elapsed_str:>6}] {ts} {delta_str:>8} "
340
- _write_log(prefix + _strip_ansi(text) + end)
341
-
342
- builtins.print = _tprint
343
-
344
- # Ensure project root is on sys.path
345
- _project_root = os.environ.get("KITE_PROJECT") or os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
346
- if _project_root not in sys.path:
347
- sys.path.insert(0, _project_root)
348
-
349
- from extensions.services.watchdog.monitor import HealthMonitor
350
-
351
-
352
- def _read_stdin_kite_message(expected_type: str, timeout: float = 10) -> dict | None:
353
- """Read a single kite message of expected type from stdin with timeout."""
354
- result = [None]
355
-
356
- def _read():
357
- try:
358
- line = sys.stdin.readline().strip()
359
- if line:
360
- msg = json.loads(line)
361
- if isinstance(msg, dict) and msg.get("kite") == expected_type:
362
- result[0] = msg
363
- except Exception:
364
- pass
365
-
366
- t = threading.Thread(target=_read, daemon=True)
367
- t.start()
368
- t.join(timeout=timeout)
369
- return result[0]
370
-
371
-
372
- # Global WS reference for publish_event callback
373
- _ws_global = None
374
- _shutting_down = False
375
- _exit_code = 0 # Exit code for main() to use
376
- _monitor = None
377
- _monitor_task = None
378
-
379
- # RPC request-response infrastructure
380
- _rpc_waiters: dict[str, asyncio.Event] = {} # rpc_id -> Event
381
- _rpc_results: dict[str, dict] = {} # rpc_id -> response dict
382
-
383
-
384
- def _is_auth_failure(e: Exception) -> bool:
385
- """Check if a WebSocket exception indicates authentication failure."""
386
- if hasattr(e, 'rcvd') and e.rcvd is not None:
387
- code = e.rcvd.code if hasattr(e.rcvd, 'code') else 0
388
- return code in (4001, 4003)
389
- return False
390
-
391
-
392
- async def main():
393
- global _ws_global, _shutting_down, _monitor
394
- # Initialize log file paths
395
- global _log_dir, _log_latest_path, _crash_log_path
396
- module_data = os.environ.get("KITE_MODULE_DATA")
397
- if module_data:
398
- _log_dir = os.path.join(module_data, "log")
399
- os.makedirs(_log_dir, exist_ok=True)
400
- suffix = os.environ.get("KITE_INSTANCE_SUFFIX", "")
401
-
402
- _log_latest_path = os.path.join(_log_dir, f"latest{suffix}.log")
403
- try:
404
- with open(_log_latest_path, "w", encoding="utf-8") as f:
405
- pass
406
- except Exception:
407
- _log_latest_path = None
408
-
409
- _crash_log_path = os.path.join(_log_dir, f"crashes{suffix}.jsonl")
410
- try:
411
- with open(_crash_log_path, "w", encoding="utf-8") as f:
412
- pass
413
- except Exception:
414
- _crash_log_path = None
415
-
416
- _resolve_daily_log_path()
417
-
418
- _setup_exception_hooks()
419
-
420
- _t0 = time.monotonic()
421
-
422
- # Read boot_info from stdin (only token)
423
- token = ""
424
- try:
425
- line = sys.stdin.readline().strip()
426
- if line:
427
- boot_info = json.loads(line)
428
- token = boot_info.get("token", "")
429
- except Exception:
430
- pass
431
-
432
- # Read kernel_port: env first (fast path), stdin fallback (parallel start)
433
- kernel_port = int(os.environ.get("KITE_KERNEL_PORT", "0"))
434
- if not kernel_port:
435
- msg = _read_stdin_kite_message("kernel_port", timeout=10)
436
- if msg:
437
- kernel_port = int(msg.get("kernel_port", 0))
438
-
439
- if not token or not kernel_port:
440
- print("[watchdog] ERROR: Missing token or kernel_port")
441
- sys.exit(1)
442
-
443
- print(f"[watchdog] Token received ({len(token)} chars), kernel port: {kernel_port} ({_fmt_elapsed(_t0)})")
444
-
445
- # Create monitor (once, persists across reconnects)
446
- _monitor = HealthMonitor(
447
- own_token=token,
448
- kernel_port=kernel_port,
449
- )
450
-
451
- # Start reconnect loop
452
- await _ws_loop(token, kernel_port, _t0)
453
-
454
-
455
- async def _ws_loop(token: str, kernel_port: int, _t0: float):
456
- """Connect to Kernel with exponential backoff reconnection."""
457
- global _shutting_down, _exit_code
458
- retry_delay = 0.3
459
- max_delay = 5.0
460
- max_retries = 10
461
- attempt = 0
462
- while not _shutting_down:
463
- try:
464
- await _ws_connect(token, kernel_port, _t0)
465
- retry_delay = 0.3
466
- attempt = 0
467
- except asyncio.CancelledError:
468
- return
469
- except Exception as e:
470
- attempt += 1
471
- if _is_auth_failure(e):
472
- print(f"[watchdog] Kernel 认证失败,退出")
473
- _exit_code = 1
474
- _shutting_down = True
475
- return
476
- if attempt >= max_retries:
477
- print(f"[watchdog] 重连失败 {max_retries} 次,退出")
478
- _exit_code = 1
479
- _shutting_down = True
480
- return
481
- _write_crash(type(e), e, e.__traceback__, severity="error", handled=True)
482
- print(f"[watchdog] 连接错误: {e}, {retry_delay:.1f}s 后重试 ({attempt}/{max_retries})")
483
- if attempt == 5:
484
- print(f"\033[33m[watchdog] 提示: 已连续 {attempt} 次无法连接 Kernel (端口 {kernel_port})")
485
- if kernel_port < 1024:
486
- print(f"[watchdog] 端口 {kernel_port} 异常偏低,可能是 Kernel 端口绑定失败或配置错误")
487
- print(f"[watchdog] 请检查: 1) Kernel 进程是否存活 2) kernel/module.md 中 preferred_port 配置是否正确\033[0m")
488
- _ws_global_clear()
489
- if _shutting_down:
490
- return
491
- await asyncio.sleep(retry_delay)
492
- retry_delay = min(retry_delay * 2, max_delay)
493
-
494
-
495
- def _ws_global_clear():
496
- global _ws_global
497
- _ws_global = None
498
-
499
-
500
- async def _ws_connect(token: str, kernel_port: int, _t0: float):
501
- """Single WebSocket session: connect → subscribe → register → ready → receive loop."""
502
- global _ws_global, _monitor, _monitor_task
503
-
504
- ws_url = f"ws://127.0.0.1:{kernel_port}/ws?token={token}&id=watchdog"
505
- print(f"[watchdog] Connecting to Kernel: {ws_url}")
506
-
507
- async with websockets.connect(ws_url, open_timeout=5, ping_interval=None, close_timeout=10) as ws:
508
- _ws_global = ws
509
- print(f"[watchdog] Connected to Kernel ({_fmt_elapsed(_t0)})")
510
-
511
- # Subscribe to events
512
- await _rpc_call(ws, "event.subscribe", {
513
- "events": [
514
- "system.ready",
515
- "module.started",
516
- "module.stopped",
517
- "module.exiting",
518
- "module.ready",
519
- "module.shutdown",
520
- "module.offline",
521
- ],
522
- })
523
- print(f"[watchdog] Subscribed to events ({_fmt_elapsed(_t0)})")
524
-
525
- # Register to Kernel Registry via RPC
526
- await _rpc_call(ws, "registry.register", {
527
- "module_id": "watchdog",
528
- "module_type": "service",
529
- "tools": {
530
- "rpc": {
531
- "module": {
532
- "health": {"method": "health", "description": "健康检查"},
533
- "status": {"method": "status", "description": "状态查询"}
534
- }
535
- }
536
- },
537
- "events_publish": {
538
- "watchdog": {
539
- "module": {
540
- "unhealthy": {"description": "模块不健康"},
541
- "recovered": {"description": "模块恢复"},
542
- "resource_critical": {"description": "资源严重不足"},
543
- "resource_warning": {"description": "资源警告"},
544
- "resource_recovered": {"description": "资源恢复正常"}
545
- },
546
- "alert": {"description": "监控告警"}
547
- }
548
- },
549
- "events_subscribe": [
550
- "system.ready",
551
- "module.started",
552
- "module.stopped",
553
- "module.exiting",
554
- "module.ready",
555
- "module.shutdown",
556
- "module.offline",
557
- ],
558
- })
559
- print(f"[watchdog] Registered to Kernel ({_fmt_elapsed(_t0)})")
560
-
561
- # Set up monitor callbacks (reconnect-safe)
562
- _monitor.publish_event = lambda event: asyncio.create_task(_publish_event(ws, event))
563
- _monitor.rpc_call = lambda method, params: _rpc_call_with_response(ws, method, params)
564
-
565
- # Publish module.ready (every reconnect)
566
- if not _shutting_down:
567
- startup_time = time.monotonic() - _t0
568
- await _rpc_call(ws, "event.publish", {
569
- "event_id": str(uuid.uuid4()),
570
- "event": "module.ready",
571
- "data": {
572
- "module_id": "watchdog",
573
- "graceful_shutdown": True,
574
- "startup_time": startup_time,
575
- },
576
- })
577
- print(f"[watchdog] module.ready published ({_fmt_elapsed(_t0)})")
578
-
579
- # Start monitor loop if not already running
580
- if _monitor_task is None or _monitor_task.done():
581
- _monitor_task = asyncio.create_task(_monitor.run())
582
-
583
- # Message loop: handle incoming RPC + events
584
- # CRITICAL: RPC 死锁防范
585
- # - 入站 RPC 请求必须用 create_task() 异步执行,不可 await
586
- # - 原因:如果 handler 内部调用 rpc_call_with_response() 发出站请求,出站响应需要本接收循环来分发
587
- # - 如果接收循环被 await handler 阻塞,出站响应永远收不到 → 超时死锁
588
- # - 事件通知和 RPC 响应可以同步处理(它们不会反向调用 rpc_call)
589
- async for raw in ws:
590
- try:
591
- msg = json.loads(raw)
592
- except (json.JSONDecodeError, TypeError):
593
- continue
594
-
595
- try:
596
- has_method = "method" in msg
597
- has_id = "id" in msg
598
-
599
- if has_method and not has_id:
600
- # Event Notification
601
- await _handle_event_notification(msg, _monitor)
602
- elif has_method and has_id:
603
- # Incoming RPC request run in background to prevent deadlock
604
- asyncio.create_task(_handle_rpc_request(ws, msg, _monitor))
605
- elif has_id and not has_method:
606
- # RPC response — route to waiter
607
- msg_id = msg["id"]
608
- if msg_id in _rpc_waiters:
609
- _rpc_results[msg_id] = msg
610
- _rpc_waiters[msg_id].set()
611
- except Exception as e:
612
- print(f"[watchdog] 消息处理异常(已忽略): {e}")
613
-
614
-
615
-
616
- async def _rpc_call(ws, method: str, params: dict = None):
617
- """Send a JSON-RPC 2.0 request (fire-and-forget, no response awaited)."""
618
- msg = {"jsonrpc": "2.0", "id": str(uuid.uuid4()), "method": method}
619
- if params:
620
- msg["params"] = params
621
- await ws.send(json.dumps(msg))
622
-
623
-
624
- async def _publish_event(ws, event: dict):
625
- """Publish an event via RPC event.publish."""
626
- await _rpc_call(ws, "event.publish", {
627
- "event_id": str(uuid.uuid4()),
628
- "event": event.get("event", ""),
629
- "data": event.get("data", {}),
630
- })
631
-
632
-
633
- async def _rpc_call_with_response(ws, method: str, params: dict = None, timeout: float = 5):
634
- """Send a JSON-RPC 2.0 request and await the response."""
635
- rpc_id = str(uuid.uuid4())
636
- msg = {"jsonrpc": "2.0", "id": rpc_id, "method": method}
637
- if params:
638
- msg["params"] = params
639
-
640
- evt = asyncio.Event()
641
- _rpc_waiters[rpc_id] = evt
642
-
643
- await ws.send(json.dumps(msg))
644
-
645
- try:
646
- await asyncio.wait_for(evt.wait(), timeout=timeout)
647
- return _rpc_results.pop(rpc_id, {})
648
- except asyncio.TimeoutError:
649
- return {"error": {"code": -32000, "message": f"RPC timeout: {method}"}}
650
- finally:
651
- _rpc_waiters.pop(rpc_id, None)
652
- _rpc_results.pop(rpc_id, None)
653
-
654
-
655
- async def _publish_event(ws, event: dict):
656
- """Publish an event via RPC event.publish."""
657
- await _rpc_call(ws, "event.publish", {
658
- "event_id": str(uuid.uuid4()),
659
- "event": event.get("event", ""),
660
- "data": event.get("data", {}),
661
- })
662
-
663
-
664
- async def _handle_ping_event(data: dict):
665
- """Handle system.ping event and reply with system.pong."""
666
- t1 = data.get("ping_time")
667
- t2 = time.time()
668
-
669
- await _publish_event(_ws_global, {
670
- "event": "system.pong",
671
- "data": {
672
- "module_id": MODULE_NAME,
673
- "ping_time": t1,
674
- "pong_time": t2,
675
- },
676
- })
677
-
678
-
679
- async def _handle_event_notification(msg: dict, monitor: HealthMonitor):
680
- """Handle an event notification (JSON-RPC 2.0 Notification with method='event')."""
681
- params = msg.get("params", {})
682
- event_type = params.get("event", "")
683
- data = params.get("data", {})
684
-
685
- # Handle system.ping event
686
- if event_type == "system.ping":
687
- await _handle_ping_event(data)
688
- return
689
-
690
- # Debug: log all shutdown events
691
- if event_type == "module.shutdown":
692
- target = data.get("module_id", "")
693
- reason = data.get("reason", "")
694
- # Handle both targeted shutdown (module_id == "watchdog") and broadcast shutdown (no module_id or launcher_lost)
695
- if target == "watchdog" or not target or reason == "launcher_lost":
696
- await _handle_shutdown(monitor)
697
- return
698
-
699
- # Forward to monitor (extract params from JSON-RPC notification)
700
- await monitor.handle_event(params)
701
-
702
-
703
- async def _handle_rpc_request(ws, msg: dict, monitor: HealthMonitor):
704
- """Handle an incoming RPC request (watchdog.* methods)."""
705
- rpc_id = msg.get("id", "")
706
- method = msg.get("method", "")
707
- params = msg.get("params", {})
708
-
709
- handlers = {
710
- "health": lambda p: _rpc_health(monitor),
711
- "status": lambda p: _rpc_status(monitor),
712
- }
713
- handler = handlers.get(method)
714
- if handler:
715
- try:
716
- result = await handler(params)
717
- await ws.send(json.dumps({"jsonrpc": "2.0", "id": rpc_id, "result": result}))
718
- except Exception as e:
719
- await ws.send(json.dumps({
720
- "jsonrpc": "2.0", "id": rpc_id,
721
- "error": {"code": -32603, "message": str(e)},
722
- }))
723
- else:
724
- await ws.send(json.dumps({
725
- "jsonrpc": "2.0", "id": rpc_id,
726
- "error": {"code": -32601, "message": f"Method not found: {method}"},
727
- }))
728
-
729
-
730
- async def _rpc_health(monitor: HealthMonitor) -> dict:
731
- """RPC handler for watchdog.health."""
732
- # 统计不健康的模块数量
733
- unhealthy_count = sum(1 for s in monitor.modules.values() if s.state == "unhealthy")
734
- # 统计资源严重不足的模块数量
735
- critical_resources = sum(1 for s in monitor.modules.values() if s.resource_state == "critical")
736
- # 统计总重启次数
737
- total_restarts = sum(s.restarted_count for s in monitor.modules.values())
738
-
739
- return {
740
- "status": "healthy",
741
- "uptime_seconds": round(time.time() - _start_ts),
742
- "details": {
743
- "monitored_modules": len(monitor.modules),
744
- "unhealthy_modules": unhealthy_count,
745
- "critical_resources": critical_resources,
746
- "total_restarts": total_restarts,
747
- },
748
- }
749
-
750
-
751
- async def _rpc_status(monitor: HealthMonitor) -> dict:
752
- """RPC handler for watchdog.status."""
753
- return monitor.get_status()
754
-
755
-
756
- async def _handle_shutdown(monitor: HealthMonitor):
757
- """Handle module.shutdown event — ack → exiting → cleanup → ready → exit."""
758
- global _shutting_down
759
- print("[watchdog] Received shutdown request")
760
- _shutting_down = True
761
- # Step 1: Send ack (立即确认收到)
762
- await _publish_event(_ws_global, {
763
- "event": "module.shutdown.ack",
764
- "data": {"module_id": "watchdog"},
765
- })
766
- # Step 2: Send module.exiting (开始清理)
767
- await _publish_event(_ws_global, {
768
- "event": "module.exiting",
769
- "data": {
770
- "module_id": "watchdog",
771
- "type": "passive",
772
- "reason": "shutdown_requested",
773
- "restart": "auto",
774
- "action": "none",
775
- "timeout": 2.0,
776
- "restart_delay": 0.0,
777
- },
778
- })
779
- # Step 3: Cleanup
780
- monitor.stop()
781
- # Step 4: Send ready (清理完成)
782
- await _publish_event(_ws_global, {
783
- "event": "module.shutdown.ready",
784
- "data": {"module_id": "watchdog"},
785
- })
786
- print("[watchdog] Shutdown ready, exiting")
787
- # Step 5: Exit
788
- sys.exit(_exit_code)
789
-
790
-
791
- if __name__ == "__main__":
792
- asyncio.run(main())
793
-
1
+ """
2
+ Watchdog entry point.
3
+ Connects to Kernel via WebSocket JSON-RPC 2.0, registers, subscribes to events,
4
+ runs health monitor loop, handles incoming RPC requests.
5
+ """
6
+
7
+ import asyncio
8
+ import builtins
9
+ import json
10
+ import os
11
+ import re
12
+ import sys
13
+ import threading
14
+ import time
15
+ import traceback
16
+ import uuid
17
+ import random
18
+ from datetime import datetime, timezone
19
+
20
+ import websockets
21
+
22
+
23
+ # ── Module configuration ──
24
+
25
+ def _load_module_config() -> dict:
26
+ """Load module configuration from module.md frontmatter.
27
+
28
+ Returns:
29
+ Dict with keys: name, preferred_port, advertise_ip
30
+
31
+ Raises:
32
+ SystemExit: If module.md is invalid or name is non-compliant
33
+ """
34
+ _this_dir = os.path.dirname(os.path.abspath(__file__))
35
+ module_md = os.path.join(_this_dir, "module.md")
36
+
37
+ # Calculate relative path for error messages
38
+ project_root = os.environ.get("KITE_PROJECT", "")
39
+ if project_root and _this_dir.startswith(project_root):
40
+ rel_path = os.path.relpath(_this_dir, project_root)
41
+ else:
42
+ rel_path = _this_dir
43
+
44
+ # Default values (will be overridden if valid config exists)
45
+ result = {
46
+ "name": "",
47
+ "preferred_port": 0,
48
+ "advertise_ip": "0.0.0.0"
49
+ }
50
+
51
+ # Check if module.md exists
52
+ if not os.path.exists(module_md):
53
+ print(f"[{rel_path}] ERROR: Invalid module configuration in module.md")
54
+ print(f" Path: {rel_path}/module.md")
55
+ print(f" Reason: File not found")
56
+ sys.exit(1)
57
+
58
+ try:
59
+ with open(module_md, encoding="utf-8") as f:
60
+ text = f.read()
61
+
62
+ # Extract YAML frontmatter (between --- markers)
63
+ import re
64
+ m = re.match(r'^---\s*\n(.*?)\n---', text, re.DOTALL)
65
+ if not m:
66
+ print(f"[{rel_path}] ERROR: Invalid module configuration in module.md")
67
+ print(f" Path: {rel_path}/module.md")
68
+ print(f" Reason: Missing YAML frontmatter")
69
+ sys.exit(1)
70
+
71
+ # Parse YAML frontmatter
72
+ try:
73
+ import yaml
74
+ fm = yaml.safe_load(m.group(1)) or {}
75
+ except ImportError:
76
+ print(f"[{rel_path}] ERROR: PyYAML not installed, cannot parse module.md")
77
+ sys.exit(1)
78
+ except Exception as e:
79
+ print(f"[{rel_path}] ERROR: Invalid module configuration in module.md")
80
+ print(f" Path: {rel_path}/module.md")
81
+ print(f" Reason: YAML parse error: {e}")
82
+ sys.exit(1)
83
+
84
+ # Validate 'name' field (required)
85
+ if "name" not in fm:
86
+ print(f"[{rel_path}] ERROR: Invalid module configuration in module.md")
87
+ print(f" Path: {rel_path}/module.md")
88
+ print(f" Reason: Missing 'name' field")
89
+ sys.exit(1)
90
+
91
+ raw_name = str(fm["name"]).strip()
92
+
93
+ if not raw_name:
94
+ print(f"[{rel_path}] ERROR: Invalid module configuration in module.md")
95
+ print(f" Path: {rel_path}/module.md")
96
+ print(f" Reason: Empty module name")
97
+ sys.exit(1)
98
+
99
+ # Validate name characters
100
+ sanitized = re.sub(r'[^a-zA-Z0-9_\-]', '', raw_name)
101
+
102
+ if sanitized != raw_name:
103
+ invalid_chars = ''.join(sorted(set(c for c in raw_name if c not in sanitized)))
104
+ print(f"[{rel_path}] ERROR: Invalid module configuration in module.md")
105
+ print(f" Path: {rel_path}/module.md")
106
+ print(f" Reason: Invalid characters in name '{raw_name}': {repr(invalid_chars)}")
107
+ sys.exit(1)
108
+
109
+ result["name"] = sanitized
110
+
111
+ # Extract optional fields
112
+ if "preferred_port" in fm:
113
+ try:
114
+ result["preferred_port"] = int(fm["preferred_port"])
115
+ except (ValueError, TypeError):
116
+ pass
117
+
118
+ if "advertise_ip" in fm:
119
+ result["advertise_ip"] = str(fm["advertise_ip"])
120
+
121
+ # max_connections(弹性连接上限)
122
+ try:
123
+ result["max_connections"] = max(1, min(10, int(fm.get("max_connections", 1))))
124
+ except (ValueError, TypeError):
125
+ result["max_connections"] = 1
126
+
127
+ except SystemExit:
128
+ raise # Re-raise exit to prevent catching by outer except
129
+ except Exception as e:
130
+ print(f"[{rel_path}] ERROR: Failed to read module.md: {e}")
131
+ sys.exit(1)
132
+
133
+ return result
134
+
135
+ _module_config = _load_module_config()
136
+ MODULE_NAME = _module_config["name"]
137
+
138
+
139
+ def _fmt_elapsed(t0: float) -> str:
140
+ """Format elapsed time since t0: <1s → 'NNNms', >=1s → 'N.Ns', >=10s → 'NNs'."""
141
+ d = time.monotonic() - t0
142
+ if d < 1:
143
+ return f"{d * 1000:.0f}ms"
144
+ if d < 10:
145
+ return f"{d:.1f}s"
146
+ return f"{d:.0f}s"
147
+
148
+
149
+ # ── Safe stdout/stderr: ignore BrokenPipeError after Launcher closes stdio ──
150
+
151
+ class _SafeWriter:
152
+ """Wraps a stream to silently swallow BrokenPipeError on write/flush."""
153
+ def __init__(self, stream):
154
+ self._stream = stream
155
+
156
+ def write(self, s):
157
+ try:
158
+ self._stream.write(s)
159
+ except (BrokenPipeError, OSError):
160
+ pass
161
+
162
+ def flush(self):
163
+ try:
164
+ self._stream.flush()
165
+ except (BrokenPipeError, OSError):
166
+ pass
167
+
168
+ def __getattr__(self, name):
169
+ return getattr(self._stream, name)
170
+
171
+ sys.stdout = _SafeWriter(sys.stdout)
172
+ sys.stderr = _SafeWriter(sys.stderr)
173
+
174
+
175
+ # ── Timestamped print + log file writer ──
176
+
177
+ _builtin_print = builtins.print
178
+ _start_ts = time.monotonic()
179
+ _last_ts = time.monotonic()
180
+ _ANSI_RE = re.compile(r"\033\[[0-9;]*m")
181
+ _log_lock = threading.Lock()
182
+ _log_latest_path = None
183
+ _log_daily_path = None
184
+ _log_daily_date = ""
185
+ _log_dir = None
186
+ _crash_log_path = None
187
+
188
+ def _strip_ansi(s: str) -> str:
189
+ return _ANSI_RE.sub("", s)
190
+
191
+ def _resolve_daily_log_path():
192
+ """Resolve daily log path based on current date."""
193
+ global _log_daily_path, _log_daily_date
194
+ if not _log_dir:
195
+ return
196
+ today = datetime.now().strftime("%Y-%m-%d")
197
+ if today == _log_daily_date and _log_daily_path:
198
+ return
199
+ month_dir = os.path.join(_log_dir, today[:7])
200
+ os.makedirs(month_dir, exist_ok=True)
201
+ _log_daily_path = os.path.join(month_dir, f"{today}.log")
202
+ _log_daily_date = today
203
+
204
+ def _write_log(plain_line: str):
205
+ """Write a plain-text line to both latest.log and daily log."""
206
+ with _log_lock:
207
+ if _log_latest_path:
208
+ try:
209
+ with open(_log_latest_path, "a", encoding="utf-8") as f:
210
+ f.write(plain_line)
211
+ except Exception:
212
+ pass
213
+ _resolve_daily_log_path()
214
+ if _log_daily_path:
215
+ try:
216
+ with open(_log_daily_path, "a", encoding="utf-8") as f:
217
+ f.write(plain_line)
218
+ except Exception:
219
+ pass
220
+
221
+
222
+ def _write_crash(exc_type, exc_value, exc_tb, thread_name=None, severity="critical", handled=False):
223
+ """Write crash record to crashes.jsonl + daily crash archive."""
224
+ record = {
225
+ "timestamp": datetime.now(timezone.utc).isoformat(),
226
+ "module": MODULE_NAME,
227
+ "thread": thread_name or threading.current_thread().name,
228
+ "exception_type": exc_type.__name__ if exc_type else "Unknown",
229
+ "exception_message": str(exc_value),
230
+ "traceback": "".join(traceback.format_exception(exc_type, exc_value, exc_tb)),
231
+ "severity": severity,
232
+ "handled": handled,
233
+ "process_id": os.getpid(),
234
+ "platform": sys.platform,
235
+ "runtime_version": f"Python {sys.version.split()[0]}",
236
+ }
237
+
238
+ if exc_tb:
239
+ tb_entries = traceback.extract_tb(exc_tb)
240
+ if tb_entries:
241
+ last = tb_entries[-1]
242
+ record["context"] = {
243
+ "function": last.name,
244
+ "file": os.path.basename(last.filename),
245
+ "line": last.lineno,
246
+ }
247
+
248
+ line = json.dumps(record, ensure_ascii=False) + "\n"
249
+
250
+ if _crash_log_path:
251
+ try:
252
+ with open(_crash_log_path, "a", encoding="utf-8") as f:
253
+ f.write(line)
254
+ except Exception:
255
+ pass
256
+
257
+ if _log_dir:
258
+ try:
259
+ today = datetime.now().strftime("%Y-%m-%d")
260
+ archive_dir = os.path.join(_log_dir, "crashes", today[:7])
261
+ os.makedirs(archive_dir, exist_ok=True)
262
+ archive_path = os.path.join(archive_dir, f"{today}.jsonl")
263
+ with open(archive_path, "a", encoding="utf-8") as f:
264
+ f.write(line)
265
+ except Exception:
266
+ pass
267
+
268
+
269
+ def _print_crash_summary(exc_type, exc_tb, thread_name=None):
270
+ """Print crash summary to console (red highlight)."""
271
+ RED = "\033[91m"
272
+ RESET = "\033[0m"
273
+
274
+ if exc_tb:
275
+ tb_entries = traceback.extract_tb(exc_tb)
276
+ if tb_entries:
277
+ last = tb_entries[-1]
278
+ location = f"{os.path.basename(last.filename)}:{last.lineno}"
279
+ else:
280
+ location = "unknown"
281
+ else:
282
+ location = "unknown"
283
+
284
+ prefix = f"[{MODULE_NAME}]"
285
+ if thread_name:
286
+ _builtin_print(f"{prefix} {RED}线程 {thread_name} 崩溃: "
287
+ f"{exc_type.__name__} in {location}{RESET}")
288
+ else:
289
+ _builtin_print(f"{prefix} {RED}崩溃: {exc_type.__name__} in {location}{RESET}")
290
+ if _crash_log_path:
291
+ _builtin_print(f"{prefix} 崩溃日志: {_crash_log_path}")
292
+
293
+ def _setup_exception_hooks():
294
+ """Set up global exception hooks."""
295
+ _orig_excepthook = sys.excepthook
296
+
297
+ def _excepthook(exc_type, exc_value, exc_tb):
298
+ _write_crash(exc_type, exc_value, exc_tb, severity="critical", handled=False)
299
+ _print_crash_summary(exc_type, exc_tb)
300
+ _orig_excepthook(exc_type, exc_value, exc_tb)
301
+
302
+ sys.excepthook = _excepthook
303
+
304
+ if hasattr(threading, "excepthook"):
305
+ def _thread_excepthook(args):
306
+ _write_crash(args.exc_type, args.exc_value, args.exc_traceback,
307
+ thread_name=args.thread.name if args.thread else "unknown",
308
+ severity="error", handled=False)
309
+ _print_crash_summary(args.exc_type, args.exc_traceback,
310
+ thread_name=args.thread.name if args.thread else None)
311
+
312
+ threading.excepthook = _thread_excepthook
313
+
314
+ def _tprint(*args, **kwargs):
315
+ """Timestamped print that adds [timestamp] HH:MM:SS.mmm +delta prefix."""
316
+ global _last_ts
317
+ now = time.monotonic()
318
+ elapsed = now - _start_ts
319
+ delta = now - _last_ts
320
+ _last_ts = now
321
+
322
+ if elapsed < 1:
323
+ elapsed_str = f"{elapsed * 1000:.0f}ms"
324
+ elif elapsed < 100:
325
+ elapsed_str = f"{elapsed:.1f}s"
326
+ else:
327
+ elapsed_str = f"{elapsed:.0f}s"
328
+
329
+ if delta < 0.001:
330
+ delta_str = ""
331
+ elif delta < 1:
332
+ delta_str = f"+{delta * 1000:.0f}ms"
333
+ elif delta < 100:
334
+ delta_str = f"+{delta:.1f}s"
335
+ else:
336
+ delta_str = f"+{delta:.0f}s"
337
+
338
+ ts = datetime.now().strftime("%H:%M:%S.%f")[:-3]
339
+
340
+ _builtin_print(*args, **kwargs)
341
+
342
+ if _log_latest_path or _log_daily_path:
343
+ sep = kwargs.get("sep", " ")
344
+ end = kwargs.get("end", "\n")
345
+ text = sep.join(str(a) for a in args)
346
+ prefix = f"[{elapsed_str:>6}] {ts} {delta_str:>8} "
347
+ _write_log(prefix + _strip_ansi(text) + end)
348
+
349
+ builtins.print = _tprint
350
+
351
+ # Ensure project root is on sys.path
352
+ _project_root = os.environ.get("KITE_PROJECT") or os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
353
+ if _project_root not in sys.path:
354
+ sys.path.insert(0, _project_root)
355
+
356
+ from extensions.services.watchdog.monitor import HealthMonitor
357
+
358
+
359
+ def _read_stdin_kite_message(expected_type: str, timeout: float = 10) -> dict | None:
360
+ """Read a single kite message of expected type from stdin with timeout."""
361
+ result = [None]
362
+
363
+ def _read():
364
+ try:
365
+ line = sys.stdin.readline().strip()
366
+ if line:
367
+ msg = json.loads(line)
368
+ if isinstance(msg, dict) and msg.get("kite") == expected_type:
369
+ result[0] = msg
370
+ except Exception:
371
+ pass
372
+
373
+ t = threading.Thread(target=_read, daemon=True)
374
+ t.start()
375
+ t.join(timeout=timeout)
376
+ return result[0]
377
+
378
+
379
+ # Global WS reference for publish_event callback
380
+ _ws_global = None
381
+ _shutting_down = False
382
+ _exit_code = 0 # Exit code for main() to use
383
+ _monitor = None
384
+ _monitor_task = None
385
+ _has_registered = False # 是否已完成过注册(用于判断 reason: startup vs recovery)
386
+ _init_start_time = 0.0 # 进程启动时间(用于 startup_time 计算)
387
+
388
+ # 弹性多连接
389
+ _extra_ws: dict = {} # slot → WebSocket(附加连接)
390
+ _extra_ws_tasks: dict = {} # slot → recv loop Task
391
+ _kernel_port = "" # 缓存 kernel_port,供 offer handler 使用
392
+
393
+ # RPC request-response infrastructure
394
+ _pending_rpc: dict[str, asyncio.Future] = {} # rpc_id -> Future
395
+
396
+
397
+ def _handle_rpc_response(msg: dict):
398
+ """Route an RPC response to its pending Future."""
399
+ rpc_id = msg.get("id")
400
+ future = _pending_rpc.pop(rpc_id, None)
401
+ if future and not future.done():
402
+ future.set_result(msg)
403
+
404
+
405
+ def _is_auth_failure(e: Exception) -> bool:
406
+ """Check if a WebSocket exception indicates authentication failure."""
407
+ if hasattr(e, 'rcvd') and e.rcvd is not None:
408
+ code = e.rcvd.code if hasattr(e.rcvd, 'code') else 0
409
+ return code in (4001, 4003)
410
+ return False
411
+
412
+
413
+ async def _main_with_monitor(token: str, kernel_port: int, _t0: float):
414
+ global _ws_global, _shutting_down, _monitor, _init_start_time
415
+
416
+ _init_start_time = _t0 # 记录进程启动时间
417
+
418
+ # Create monitor (once, persists across reconnects)
419
+ _monitor = HealthMonitor(
420
+ own_token=token,
421
+ kernel_port=kernel_port,
422
+ )
423
+
424
+ # 注册 request_exit 回调,让 monitor 能请求进程退出
425
+ def _request_exit(code: int = 0):
426
+ global _shutting_down, _exit_code
427
+ _exit_code = code
428
+ _shutting_down = True
429
+ # 关闭 WebSocket 以中断 _ws_loop
430
+ if _ws_global:
431
+ asyncio.ensure_future(_ws_global.close(code=1000, reason="Monitor requested exit"))
432
+ _monitor.request_exit = _request_exit
433
+
434
+ # Start reconnect loop
435
+ await _ws_loop(token, kernel_port, _t0)
436
+
437
+
438
+ async def _ws_loop(token: str, kernel_port: int, _t0: float):
439
+ """Connect to Kernel with exponential backoff reconnection."""
440
+ global _shutting_down, _exit_code
441
+ retry_delay = 0.5
442
+ max_delay = 10.0
443
+ attempt = 0
444
+ cooldown_attempts = 0
445
+
446
+ while not _shutting_down:
447
+ try:
448
+ await _ws_connect(token, kernel_port, _t0)
449
+ retry_delay = 0.5
450
+ attempt = 0
451
+ cooldown_attempts = 0
452
+ except asyncio.CancelledError:
453
+ return
454
+ except Exception as e:
455
+ if _shutting_down:
456
+ return
457
+
458
+ code = _get_close_code(e)
459
+
460
+ # never: 永不重连
461
+ if code in (4001, 4003, 4004, 1008, 4010):
462
+ print(f"[watchdog] 致命错误 (code {code}),退出")
463
+ _exit_code = 1
464
+ _shutting_down = True
465
+ return
466
+
467
+ # cooldown: 速率限制
468
+ if code == 4020:
469
+ cooldown_attempts += 1
470
+ if cooldown_attempts >= 5:
471
+ print(f"[watchdog] 速率限制重试 5 次,退出")
472
+ _exit_code = 1
473
+ _shutting_down = True
474
+ return
475
+ print(f"[watchdog] 速率限制,10.0s 后重试 ({cooldown_attempts}/5)")
476
+ await asyncio.sleep(10.0)
477
+ continue
478
+
479
+ # 连接被拒绝/重置 — Kernel 可能正在关闭,不记录崩溃日志
480
+ # Windows 的 [WinError 1225] 是 OSError 不是 ConnectionRefusedError
481
+ _is_conn_refused = isinstance(e, (ConnectionRefusedError, ConnectionResetError))
482
+ if not _is_conn_refused and isinstance(e, OSError) and getattr(e, 'winerror', None) == 1225:
483
+ _is_conn_refused = True
484
+ if _is_conn_refused:
485
+ attempt += 1
486
+ if attempt >= 3:
487
+ print(f"[watchdog] Kernel 持续不可达 ({attempt} ),正常退出")
488
+ _exit_code = 0
489
+ _shutting_down = True
490
+ return
491
+ jitter = retry_delay * 0.2 * random.random()
492
+ sleep_time = retry_delay + jitter
493
+ print(f"[watchdog] Kernel 连接失败: {type(e).__name__}, {sleep_time:.1f}s 后重试 (attempt {attempt})")
494
+ else:
495
+ # standard: 指数退避 + jitter
496
+ attempt += 1
497
+ jitter = retry_delay * 0.2 * random.random()
498
+ sleep_time = retry_delay + jitter
499
+ _write_crash(type(e), e, e.__traceback__, severity="error", handled=True)
500
+ print(f"[watchdog] 连接错误: {e}, {sleep_time:.1f}s 后重试 (attempt {attempt})")
501
+ if attempt >= 10:
502
+ print(f"[watchdog] 连续 {attempt} 次连接失败,正常退出")
503
+ _exit_code = 0
504
+ _shutting_down = True
505
+ return
506
+ if attempt == 5:
507
+ print(f"\033[33m[watchdog] 提示: 已连续 {attempt} 次无法连接 Kernel (端口 {kernel_port})")
508
+ if kernel_port < 1024:
509
+ print(f"[watchdog] 端口 {kernel_port} 异常偏低,可能是 Kernel 端口绑定失败或配置错误")
510
+ print(f"[watchdog] 请检查: 1) Kernel 进程是否存活 2) kernel/module.md 中 preferred_port 配置是否正确\033[0m")
511
+
512
+ _ws_global_clear()
513
+ if _shutting_down:
514
+ return
515
+ await asyncio.sleep(sleep_time if 'sleep_time' in locals() else retry_delay)
516
+ retry_delay = min(retry_delay * 2, max_delay)
517
+
518
+
519
+ def _get_close_code(e: Exception) -> int:
520
+ """从 websockets 异常中提取关闭码"""
521
+ if hasattr(e, 'rcvd') and e.rcvd is not None:
522
+ return getattr(e.rcvd, 'code', 0)
523
+ return 0
524
+
525
+
526
+ def _ws_global_clear():
527
+ global _ws_global
528
+ _ws_global = None
529
+ # 断开时清理所有等待中的 RPC Future,避免泄漏
530
+ for rpc_id, future in list(_pending_rpc.items()):
531
+ if not future.done():
532
+ future.set_result({"error": {"code": -32000, "message": "WebSocket disconnected"}})
533
+ _pending_rpc.clear()
534
+
535
+
536
+ async def _ws_connect(token: str, kernel_port: int, _t0: float):
537
+ """Single WebSocket session: connect → auth → receiver → wait require_init → subscribe → register → ready → receive loop."""
538
+ global _ws_global, _monitor, _monitor_task, _kernel_port, _has_registered
539
+
540
+ _kernel_port = kernel_port
541
+ ws_url = f"ws://127.0.0.1:{kernel_port}/ws?id=watchdog"
542
+ print(f"[watchdog] Connecting to Kernel: {ws_url}")
543
+
544
+ async with websockets.connect(ws_url, open_timeout=5, ping_interval=None, close_timeout=10) as ws:
545
+ # Send auth message first
546
+ auth_req = {
547
+ "jsonrpc": "2.0",
548
+ "id": "auth",
549
+ "method": "auth",
550
+ "params": {"token": token}
551
+ }
552
+ await ws.send(json.dumps(auth_req))
553
+
554
+ # Wait for auth response
555
+ auth_resp_raw = await asyncio.wait_for(ws.recv(), timeout=5)
556
+ auth_resp = json.loads(auth_resp_raw)
557
+ if "error" in auth_resp:
558
+ raise Exception(f"Auth failed: {auth_resp['error']}")
559
+
560
+ _ws_global = ws
561
+ print(f"[watchdog] Connected to Kernel ({_fmt_elapsed(_t0)})")
562
+
563
+ # 启动接收循环(先于 RPC 调用,防止 wait_response 死锁)
564
+ # system.require_init 会在 receiver 中被识别并触发异步注册
565
+ receiver_task = asyncio.create_task(_ws_receiver(ws, _monitor))
566
+
567
+ try:
568
+ # Set up monitor callbacks (reconnect-safe, require_init 之前设置好)
569
+ _monitor.publish_event = lambda event: asyncio.create_task(
570
+ _publish_event(ws, event.get("event", ""), event.get("data", {}))
571
+ )
572
+ _monitor.rpc_call = lambda method, params: _rpc_call(ws, method, params)
573
+ _monitor.rpc_call_kernel = lambda method, params: _rpc_call(ws, method, params)
574
+
575
+ # Start monitor loop if not already running
576
+ if _monitor_task is None or _monitor_task.done():
577
+ _monitor_task = asyncio.create_task(_monitor.run())
578
+
579
+ # 等待接收循环结束(连接关闭时自然退出)
580
+ # system.require_init 到达后会在 receiver 中触发 _do_init 异步任务
581
+ await receiver_task
582
+ finally:
583
+ receiver_task.cancel()
584
+
585
+
586
+
587
+ async def _ws_receiver(ws, monitor):
588
+ """接收循环:分发 RPC 响应、事件通知、入站 RPC 请求。
589
+ 必须在 subscribe/register 之前启动,否则 _rpc_call 的 wait_response 会死锁。
590
+ 识别 system.require_init 系统消息,触发异步注册任务。"""
591
+ try:
592
+ async for raw in ws:
593
+ try:
594
+ msg = json.loads(raw)
595
+ except (json.JSONDecodeError, TypeError):
596
+ continue
597
+
598
+ try:
599
+ has_method = "method" in msg
600
+ has_id = "id" in msg
601
+
602
+ if has_method and not has_id:
603
+ # 检查 system.require_init 系统消息(不走 EventHub 订阅)
604
+ params = msg.get("params", {})
605
+ event_type = params.get("event", "")
606
+ if event_type == "system.require_init":
607
+ asyncio.create_task(_do_init(ws))
608
+ continue
609
+ asyncio.create_task(_handle_event_notification(msg, monitor))
610
+ elif has_method and has_id:
611
+ asyncio.create_task(_handle_rpc_request(ws, msg, monitor))
612
+ elif has_id and not has_method:
613
+ _handle_rpc_response(msg)
614
+ except Exception as e:
615
+ print(f"[watchdog] 消息处理异常(已忽略): {e}")
616
+ except websockets.exceptions.ConnectionClosed:
617
+ pass # Kernel 连接关闭,正常退出接收循环
618
+
619
+
620
+ async def _rpc_call(ws, method: str, params: dict = None,
621
+ wait_response: bool = True, timeout: float = 3.0) -> dict:
622
+ """JSON-RPC 2.0 request。默认等待响应。"""
623
+ rpc_id = str(uuid.uuid4())
624
+ msg = {"jsonrpc": "2.0", "id": rpc_id, "method": method}
625
+ if params:
626
+ msg["params"] = params
627
+ if not wait_response:
628
+ await ws.send(json.dumps(msg))
629
+ return {}
630
+ future = asyncio.get_event_loop().create_future()
631
+ _pending_rpc[rpc_id] = future
632
+ await ws.send(json.dumps(msg))
633
+ try:
634
+ return await asyncio.wait_for(future, timeout=timeout)
635
+ except asyncio.TimeoutError:
636
+ _pending_rpc.pop(rpc_id, None)
637
+ return {"error": {"code": -32000, "message": f"RPC timeout: {method} ({timeout}s)"}}
638
+
639
+
640
+ async def _do_init(ws):
641
+ """收到 system.require_init 后执行:subscribe → register → module.ready。
642
+ 异步任务,不阻塞 receiver。"""
643
+ global _has_registered
644
+
645
+ # 判断 reason
646
+ if not _has_registered:
647
+ reason = "startup"
648
+ else:
649
+ reason = "recovery"
650
+
651
+ print(f"[watchdog] Received system.require_init, performing init (reason={reason})")
652
+
653
+ try:
654
+ # Subscribe to events
655
+ await _rpc_call(ws, "event.subscribe", {
656
+ "events": [
657
+ "system.ready",
658
+ "module.started",
659
+ "module.stopped",
660
+ "module.exiting",
661
+ "module.ready",
662
+ "module.shutdown",
663
+ "module.offline",
664
+ "module.degraded",
665
+ "module.recovered",
666
+ ],
667
+ })
668
+ print(f"[watchdog] Subscribed to events (reason={reason})")
669
+
670
+ # Register to Kernel Registry via RPC
671
+ await _rpc_call(ws, "registry.register", {
672
+ "module_id": "watchdog",
673
+ "module_type": "service",
674
+ "tools": {
675
+ "rpc": {
676
+ "module": {
677
+ "health": {"method": "health", "description": "健康检查"},
678
+ "status": {"method": "status", "description": "状态查询"},
679
+ "start_monitor": {"method": "start_monitor", "description": "开启监控(由 Launcher 调用)"},
680
+ "stop_monitor": {"method": "stop_monitor", "description": "关闭监控(由 Launcher 调用)"},
681
+ "health_events": {"method": "health_events", "description": "查询健康事件记录"},
682
+ }
683
+ }
684
+ },
685
+ "events_publish": {
686
+ "watchdog": {
687
+ "module": {
688
+ "unhealthy": {"description": "模块不健康"},
689
+ "recovered": {"description": "模块恢复"},
690
+ "resource_critical": {"description": "资源严重不足"},
691
+ "resource_warning": {"description": "资源警告"},
692
+ "resource_recovered": {"description": "资源恢复正常"},
693
+ "degraded_timeout": {"description": "模块超长降级告警"},
694
+ },
695
+ "alert": {"description": "监控告警"}
696
+ }
697
+ },
698
+ "events_subscribe": [
699
+ "system.ready",
700
+ "module.started",
701
+ "module.stopped",
702
+ "module.exiting",
703
+ "module.ready",
704
+ "module.shutdown",
705
+ "module.offline",
706
+ "module.degraded",
707
+ "module.recovered",
708
+ ],
709
+ })
710
+ print(f"[watchdog] Registered to Kernel (reason={reason})")
711
+
712
+ # Publish module.ready with reason
713
+ if not _shutting_down:
714
+ startup_time = time.monotonic() - _init_start_time if _init_start_time else 0
715
+ await _publish_event(ws, "module.ready", {
716
+ "module_id": "watchdog",
717
+ "graceful_shutdown": True,
718
+ "startup_time": startup_time,
719
+ "reason": reason,
720
+ })
721
+ print(f"[watchdog] module.ready published (reason={reason})")
722
+
723
+ _has_registered = True
724
+ except Exception as e:
725
+ print(f"[watchdog] _do_init failed: {e}")
726
+
727
+
728
+ async def _publish_event(ws, event: str, data: dict = None):
729
+ """Publish event via event.publish RPC (fire-and-forget)."""
730
+ await _rpc_call(ws, "event.publish", {
731
+ "event_id": str(uuid.uuid4()),
732
+ "event": event,
733
+ "data": data or {},
734
+ }, wait_response=False)
735
+
736
+
737
+ async def _handle_ping_event(data: dict):
738
+ """Handle system.ping event and reply with system.pong."""
739
+ t1 = data.get("ping_time")
740
+ t2 = time.time()
741
+
742
+ await _publish_event(_ws_global, "system.pong", {
743
+ "module_id": MODULE_NAME,
744
+ "ping_time": t1,
745
+ "pong_time": t2,
746
+ })
747
+
748
+
749
+ async def _handle_event_notification(msg: dict, monitor: HealthMonitor):
750
+ """Handle an event notification (JSON-RPC 2.0 Notification with method='event')."""
751
+ params = msg.get("params", {})
752
+ event_type = params.get("event", "")
753
+ data = params.get("data", {})
754
+
755
+ # Handle system.ping event
756
+ if event_type == "system.ping":
757
+ await _handle_ping_event(data)
758
+ return
759
+
760
+ # 弹性连接 offer/release
761
+ if event_type == "system.connection.offer":
762
+ asyncio.create_task(_handle_connection_offer(data))
763
+ return
764
+ if event_type == "system.connection.release":
765
+ asyncio.create_task(_handle_connection_release(data))
766
+ return
767
+
768
+ # Debug: log all shutdown events
769
+ if event_type == "module.shutdown":
770
+ target = data.get("module_id", "")
771
+ reason = data.get("reason", "")
772
+ # Handle both targeted shutdown (module_id == "watchdog") and broadcast shutdown (no module_id or launcher_lost)
773
+ if target == "watchdog" or not target or reason == "launcher_lost":
774
+ await _handle_shutdown(monitor)
775
+ return
776
+
777
+ # Forward to monitor (extract params from JSON-RPC notification)
778
+ await monitor.handle_event(params)
779
+
780
+
781
+ async def _handle_rpc_request(ws, msg: dict, monitor: HealthMonitor):
782
+ """Handle an incoming RPC request (watchdog.* methods)."""
783
+ rpc_id = msg.get("id", "")
784
+ method = msg.get("method", "")
785
+ params = msg.get("params", {})
786
+
787
+ handlers = {
788
+ "health": lambda p: _rpc_health(monitor),
789
+ "status": lambda p: _rpc_status(monitor),
790
+ "start_monitor": lambda p: _rpc_start_monitor(monitor),
791
+ "stop_monitor": lambda p: _rpc_stop_monitor(monitor),
792
+ "health_events": lambda p: _rpc_health_events(monitor, p),
793
+ }
794
+ handler = handlers.get(method)
795
+ if handler:
796
+ try:
797
+ result = await handler(params)
798
+ await ws.send(json.dumps({"jsonrpc": "2.0", "id": rpc_id, "result": result}))
799
+ except Exception as e:
800
+ await ws.send(json.dumps({
801
+ "jsonrpc": "2.0", "id": rpc_id,
802
+ "error": {"code": -32603, "message": str(e)},
803
+ }))
804
+ else:
805
+ await ws.send(json.dumps({
806
+ "jsonrpc": "2.0", "id": rpc_id,
807
+ "error": {"code": -32601, "message": f"Method not found: {method}"},
808
+ }))
809
+
810
+
811
+ async def _rpc_health(monitor: HealthMonitor) -> dict:
812
+ """RPC handler for watchdog.health."""
813
+ # 统计不健康的模块数量
814
+ unhealthy_count = sum(1 for s in monitor.modules.values() if s.state == "unhealthy")
815
+ # 统计资源严重不足的模块数量
816
+ critical_resources = sum(1 for s in monitor.modules.values() if s.resource_state == "critical")
817
+ # 统计总重启次数
818
+ total_restarts = sum(s.restarted_count for s in monitor.modules.values())
819
+
820
+ return {
821
+ "status": "healthy",
822
+ "uptime_seconds": round(time.time() - _start_ts),
823
+ "details": {
824
+ "monitored_modules": len(monitor.modules),
825
+ "unhealthy_modules": unhealthy_count,
826
+ "critical_resources": critical_resources,
827
+ "total_restarts": total_restarts,
828
+ },
829
+ }
830
+
831
+
832
+ async def _rpc_status(monitor: HealthMonitor) -> dict:
833
+ """RPC handler for watchdog.status."""
834
+ return monitor.get_status()
835
+
836
+
837
+ async def _rpc_start_monitor(monitor: HealthMonitor) -> dict:
838
+ """RPC handler for watchdog.start_monitor — 由 Launcher 调用开启监控。"""
839
+ return monitor.start_monitor()
840
+
841
+
842
+ async def _rpc_stop_monitor(monitor: HealthMonitor) -> dict:
843
+ """RPC handler for watchdog.stop_monitor — 由 Launcher 调用关闭监控。"""
844
+ return monitor.stop_monitor()
845
+
846
+
847
+ async def _rpc_health_events(monitor: HealthMonitor, params: dict) -> dict:
848
+ """RPC handler for watchdog.health_events — 查询健康事件记录。"""
849
+ limit = params.get("limit", 50)
850
+ since = params.get("since")
851
+ if not isinstance(limit, int):
852
+ try:
853
+ limit = int(limit)
854
+ except (ValueError, TypeError):
855
+ limit = 50
856
+ return monitor.get_health_events(limit=limit, since=since)
857
+
858
+
859
+ async def _handle_connection_offer(data):
860
+ """处理 Kernel 下发的 slot token,建立附加连接。"""
861
+ slots = data.get("slots", {})
862
+ for slot_str, info in slots.items():
863
+ slot = int(slot_str)
864
+ token = info.get("token", "")
865
+ if not token or slot in _extra_ws:
866
+ continue
867
+ asyncio.create_task(_connect_slot(slot, token))
868
+
869
+
870
+ async def _connect_slot(slot, token):
871
+ """建立单个 slot 附加连接。"""
872
+ ws_url = f"ws://127.0.0.1:{_kernel_port}/ws"
873
+ try:
874
+ ws = await websockets.connect(ws_url, open_timeout=5, ping_interval=None, close_timeout=5)
875
+ auth_req = {"jsonrpc": "2.0", "id": f"auth-slot-{slot}", "method": "auth", "params": {"token": token}}
876
+ await ws.send(json.dumps(auth_req))
877
+ resp = json.loads(await asyncio.wait_for(ws.recv(), timeout=5))
878
+ if "error" in resp:
879
+ await ws.close()
880
+ return
881
+ _extra_ws[slot] = ws
882
+ _extra_ws_tasks[slot] = asyncio.create_task(_slot_recv_loop(slot, ws))
883
+ print(f"[watchdog] Slot {slot} connected")
884
+ except Exception as e:
885
+ print(f"[watchdog] Slot {slot} connect failed: {e}")
886
+
887
+
888
+ async def _slot_recv_loop(slot, ws):
889
+ """附加连接的接收循环:与主连接平等处理所有消息。"""
890
+ try:
891
+ async for raw in ws:
892
+ try:
893
+ msg = json.loads(raw)
894
+ except (json.JSONDecodeError, TypeError):
895
+ continue
896
+ try:
897
+ has_method = "method" in msg
898
+ has_id = "id" in msg
899
+ if has_method and not has_id:
900
+ asyncio.create_task(_handle_event_notification(msg, _monitor))
901
+ elif has_method and has_id:
902
+ asyncio.create_task(_handle_rpc_request(ws, msg, _monitor))
903
+ elif has_id and not has_method:
904
+ _handle_rpc_response(msg)
905
+ except Exception as e:
906
+ print(f"[watchdog] Slot {slot} 消息处理异常(已忽略): {e}")
907
+ except Exception:
908
+ pass
909
+ finally:
910
+ _extra_ws.pop(slot, None)
911
+ _extra_ws_tasks.pop(slot, None)
912
+
913
+
914
+ async def _handle_connection_release(data):
915
+ """Kernel 请求释放 slot,优雅关闭。"""
916
+ for slot in data.get("slots", []):
917
+ ws = _extra_ws.pop(slot, None)
918
+ task = _extra_ws_tasks.pop(slot, None)
919
+ if ws:
920
+ try:
921
+ await ws.close(code=1000, reason="release")
922
+ except Exception:
923
+ pass
924
+ if task:
925
+ task.cancel()
926
+
927
+
928
+ async def _handle_shutdown(monitor: HealthMonitor):
929
+ """Handle module.shutdown event — ack → exiting → cleanup → ready → exit."""
930
+ global _shutting_down
931
+ print("[watchdog] Received shutdown request")
932
+ _shutting_down = True
933
+ # Step 1: Send ack (立即确认收到)
934
+ await _publish_event(_ws_global, "module.shutdown.ack", {
935
+ "module_id": "watchdog",
936
+ })
937
+ # Step 2: Send module.exiting (开始清理)
938
+ await _publish_event(_ws_global, "module.exiting", {
939
+ "module_id": "watchdog",
940
+ "type": "passive",
941
+ "reason": "shutdown_requested",
942
+ "restart": "auto",
943
+ "action": "none",
944
+ "timeout": 2.0,
945
+ "restart_delay": 0.0,
946
+ })
947
+ # Step 3: Cleanup
948
+ monitor.stop()
949
+ # 关闭所有附加连接
950
+ for _s, _w in list(_extra_ws.items()):
951
+ try:
952
+ await _w.close(code=1000, reason="shutdown")
953
+ except Exception:
954
+ pass
955
+ for _t in _extra_ws_tasks.values():
956
+ _t.cancel()
957
+ _extra_ws.clear()
958
+ _extra_ws_tasks.clear()
959
+ # Step 4: Send ready (清理完成)
960
+ await _publish_event(_ws_global, "module.shutdown.ready", {
961
+ "module_id": "watchdog",
962
+ })
963
+ print("[watchdog] Shutdown ready, exiting")
964
+
965
+ # 等待 Kernel 处理 shutdown.ready(防止 Close 帧抢先)
966
+ await asyncio.sleep(0.1)
967
+
968
+ # Step 5: Close WebSocket connection gracefully
969
+ if _ws_global:
970
+ try:
971
+ await _ws_global.close(code=1000, reason="Graceful shutdown")
972
+ print("[watchdog] WebSocket closed")
973
+ except Exception as e:
974
+ print(f"[watchdog] Failed to close WebSocket: {e}")
975
+
976
+ # Note: Do NOT call sys.exit() in async context
977
+ # Let the event loop naturally complete
978
+
979
+
980
+ def _init_logs(log_base_dir: str = None, suffix: str = ""):
981
+ """初始化日志系统(本地/远程共用)。"""
982
+ global _log_dir, _log_latest_path, _crash_log_path
983
+
984
+ if log_base_dir:
985
+ _log_dir = os.path.join(log_base_dir, "log")
986
+ os.makedirs(_log_dir, exist_ok=True)
987
+ _log_latest_path = os.path.join(_log_dir, f"latest{suffix}.log")
988
+ try:
989
+ with open(_log_latest_path, "w", encoding="utf-8") as f: pass
990
+ except Exception: _log_latest_path = None
991
+ _crash_log_path = os.path.join(_log_dir, f"crashes{suffix}.jsonl")
992
+ try:
993
+ with open(_crash_log_path, "w", encoding="utf-8") as f: pass
994
+ except Exception: _crash_log_path = None
995
+ _resolve_daily_log_path()
996
+
997
+ _setup_exception_hooks()
998
+
999
+
1000
+ def _run_local_mode(kernel_port: int):
1001
+ """本地模式:从 stdin 读 boot_info,连接 Kernel。"""
1002
+ module_data = os.environ.get("KITE_MODULE_DATA")
1003
+ suffix = os.environ.get("KITE_INSTANCE_SUFFIX", "")
1004
+ _init_logs(log_base_dir=module_data, suffix=suffix)
1005
+
1006
+ _t0 = time.monotonic()
1007
+ line = sys.stdin.readline().strip()
1008
+ if not line:
1009
+ print(f"[{MODULE_NAME}] ERROR: stdin closed"); sys.exit(1)
1010
+ try: msg = json.loads(line)
1011
+ except json.JSONDecodeError as e:
1012
+ print(f"[{MODULE_NAME}] ERROR: Invalid JSON: {e}"); sys.exit(1)
1013
+ if "error" in msg:
1014
+ print(f"[{MODULE_NAME}] 启动失败: {msg.get('message')}"); sys.exit(1)
1015
+ token = msg.get("token", "")
1016
+ if not token:
1017
+ print(f"[{MODULE_NAME}] ERROR: No token"); sys.exit(1)
1018
+
1019
+ print(f"[{MODULE_NAME}] Local mode: port={kernel_port}")
1020
+ try:
1021
+ asyncio.run(_main_with_monitor(token, kernel_port, _t0))
1022
+ except (ConnectionRefusedError, ConnectionResetError):
1023
+ # Kernel 已关闭,正常退出
1024
+ print(f"[{MODULE_NAME}] Kernel 不可达,正常退出")
1025
+ sys.exit(0)
1026
+ except Exception as e:
1027
+ _write_crash(type(e), e, e.__traceback__, severity="critical", handled=True)
1028
+ _print_crash_summary(type(e), e.__traceback__)
1029
+ sys.exit(1)
1030
+ sys.exit(_exit_code)
1031
+
1032
+
1033
+ def _run_remote_mode(gateway_url: str):
1034
+ """远程模式:通过 Gateway 连接 Kernel。"""
1035
+ home = os.environ.get("HOME") or os.environ.get("USERPROFILE") or os.path.expanduser("~")
1036
+ data_dir = os.path.join(home, ".kite", "remote", MODULE_NAME)
1037
+ os.makedirs(data_dir, exist_ok=True)
1038
+ _init_logs(log_base_dir=data_dir)
1039
+
1040
+ _t0 = time.monotonic()
1041
+ kite_token = _get_kite_token(MODULE_NAME, gateway_url)
1042
+ print(f"[{MODULE_NAME}] Remote mode: gateway={gateway_url}")
1043
+
1044
+ try:
1045
+ asyncio.run(_remote_ws_loop(gateway_url, kite_token, _t0))
1046
+ except Exception as e:
1047
+ _write_crash(type(e), e, e.__traceback__, severity="critical", handled=True)
1048
+ _print_crash_summary(type(e), e.__traceback__)
1049
+ sys.exit(1)
1050
+ sys.exit(_exit_code)
1051
+
1052
+
1053
+ async def _remote_ws_loop(gateway_url: str, kite_token: str, _t0: float):
1054
+ """远程模式重连循环。"""
1055
+ global _shutting_down, _exit_code, _monitor
1056
+ retry_delay = 0.5
1057
+ max_delay = 10.0
1058
+
1059
+ # Create monitor for remote mode (no own_token / kernel_port needed for remote)
1060
+ _monitor = HealthMonitor(own_token="", kernel_port=0)
1061
+
1062
+ def _request_exit(code: int = 0):
1063
+ global _shutting_down, _exit_code
1064
+ _exit_code = code
1065
+ _shutting_down = True
1066
+ if _ws_global:
1067
+ asyncio.ensure_future(_ws_global.close(code=1000, reason="Monitor requested exit"))
1068
+ _monitor.request_exit = _request_exit
1069
+
1070
+ while not _shutting_down:
1071
+ try:
1072
+ await _remote_connect(gateway_url, kite_token, _t0)
1073
+ retry_delay = 0.5
1074
+ except asyncio.CancelledError:
1075
+ return
1076
+ except Exception as e:
1077
+ if _shutting_down: return
1078
+ _write_crash(type(e), e, e.__traceback__, severity="error", handled=True)
1079
+ jitter = retry_delay * 0.2 * random.random()
1080
+ print(f"[{MODULE_NAME}] 远程连接错误: {e}, {retry_delay + jitter:.1f}s 后重试")
1081
+ await asyncio.sleep(retry_delay + jitter)
1082
+ retry_delay = min(retry_delay * 2, max_delay)
1083
+
1084
+ _ws_global_clear()
1085
+ if _shutting_down: return
1086
+
1087
+
1088
+ async def _remote_connect(gateway_url: str, kite_token: str, _t0: float):
1089
+ """远程模式:连接 Gateway 并走认证+注册流程。"""
1090
+ global _ws_global, _monitor_task
1091
+
1092
+ print(f"[{MODULE_NAME}] Connecting to Gateway: {gateway_url}")
1093
+ async with websockets.connect(gateway_url, open_timeout=10, ping_interval=None, close_timeout=10) as ws:
1094
+ # 1. 接收 challenge
1095
+ challenge = json.loads(await asyncio.wait_for(ws.recv(), timeout=10))
1096
+ nonce = challenge.get("params", {}).get("nonce", "")
1097
+
1098
+ # 2. 发送 auth.connect
1099
+ await ws.send(json.dumps({
1100
+ "jsonrpc": "2.0", "id": "auth-connect",
1101
+ "method": "auth.connect",
1102
+ "params": {
1103
+ "nonce": nonce,
1104
+ "module_id": MODULE_NAME,
1105
+ "auth": {"method": "kite_token", "token": kite_token},
1106
+ "client": {"type": "module"}
1107
+ }
1108
+ }))
1109
+
1110
+ # 3. 等待认证响应
1111
+ resp = json.loads(await asyncio.wait_for(ws.recv(), timeout=10))
1112
+ if "error" in resp:
1113
+ error = resp["error"]
1114
+ code = error.get("code", 0)
1115
+ if code in (4001, 4010, 4011):
1116
+ _clear_token_cache(MODULE_NAME, gateway_url)
1117
+ print(f"[{MODULE_NAME}] Token 无效,请重新配对")
1118
+ sys.exit(1)
1119
+ raise Exception(f"Gateway 认证失败: {error}")
1120
+
1121
+ _ws_global = ws
1122
+ print(f"[{MODULE_NAME}] Connected via Gateway ({_fmt_elapsed(_t0)})")
1123
+
1124
+ # 启动接收循环(先于 RPC 调用,防止 wait_response 死锁)
1125
+ receiver_task = asyncio.create_task(_ws_receiver(ws, _monitor))
1126
+
1127
+ try:
1128
+ # 4. 订阅事件
1129
+ await _rpc_call(ws, "event.subscribe", {
1130
+ "events": [
1131
+ "system.ready",
1132
+ "module.started",
1133
+ "module.stopped",
1134
+ "module.exiting",
1135
+ "module.ready",
1136
+ "module.shutdown",
1137
+ "module.offline",
1138
+ "module.degraded",
1139
+ "module.recovered",
1140
+ ],
1141
+ })
1142
+
1143
+ # 5. 注册
1144
+ await _rpc_call(ws, "registry.register", {
1145
+ "module_id": "watchdog",
1146
+ "module_type": "service",
1147
+ "tools": {
1148
+ "rpc": {
1149
+ "module": {
1150
+ "health": {"method": "health", "description": "健康检查"},
1151
+ "status": {"method": "status", "description": "状态查询"},
1152
+ "start_monitor": {"method": "start_monitor", "description": "开启监控(由 Launcher 调用)"},
1153
+ "stop_monitor": {"method": "stop_monitor", "description": "关闭监控(由 Launcher 调用)"},
1154
+ "health_events": {"method": "health_events", "description": "查询健康事件记录"},
1155
+ }
1156
+ }
1157
+ },
1158
+ "events_publish": {
1159
+ "watchdog": {
1160
+ "module": {
1161
+ "unhealthy": {"description": "模块不健康"},
1162
+ "recovered": {"description": "模块恢复"},
1163
+ "resource_critical": {"description": "资源严重不足"},
1164
+ "resource_warning": {"description": "资源警告"},
1165
+ "resource_recovered": {"description": "资源恢复正常"},
1166
+ "degraded_timeout": {"description": "模块超长降级告警"},
1167
+ },
1168
+ "alert": {"description": "监控告警"}
1169
+ }
1170
+ },
1171
+ "events_subscribe": [
1172
+ "system.ready",
1173
+ "module.started",
1174
+ "module.stopped",
1175
+ "module.exiting",
1176
+ "module.ready",
1177
+ "module.shutdown",
1178
+ "module.offline",
1179
+ "module.degraded",
1180
+ "module.recovered",
1181
+ ],
1182
+ })
1183
+ print(f"[{MODULE_NAME}] Registered via Gateway ({_fmt_elapsed(_t0)})")
1184
+
1185
+ # Set up monitor callbacks (reconnect-safe)
1186
+ _monitor.publish_event = lambda event: asyncio.create_task(
1187
+ _publish_event(ws, event.get("event", ""), event.get("data", {}))
1188
+ )
1189
+ _monitor.rpc_call = lambda method, params: _rpc_call(ws, method, params)
1190
+ _monitor.rpc_call_kernel = lambda method, params: _rpc_call(ws, method, params)
1191
+
1192
+ # 6. module.ready
1193
+ if not _shutting_down:
1194
+ await _publish_event(ws, "module.ready", {
1195
+ "module_id": "watchdog",
1196
+ "graceful_shutdown": True,
1197
+ "startup_time": time.monotonic() - _t0,
1198
+ })
1199
+ print(f"[{MODULE_NAME}] module.ready via Gateway ({_fmt_elapsed(_t0)})")
1200
+
1201
+ # Start monitor loop if not already running
1202
+ if _monitor_task is None or _monitor_task.done():
1203
+ _monitor_task = asyncio.create_task(_monitor.run())
1204
+
1205
+ # 等待接收循环结束(连接关闭时自然退出)
1206
+ await receiver_task
1207
+ finally:
1208
+ receiver_task.cancel()
1209
+
1210
+
1211
+ # ── Token 管理(远程模式)──
1212
+
1213
+ def _gateway_to_filename(gateway_url: str) -> str:
1214
+ try:
1215
+ from urllib.parse import urlparse
1216
+ parsed = urlparse(gateway_url)
1217
+ host = parsed.hostname or "unknown"
1218
+ port = parsed.port or (443 if parsed.scheme == "wss" else 80)
1219
+ return f"{host}-{port}.json".replace(":", "-").replace("/", "-")
1220
+ except Exception: return "default.json"
1221
+
1222
+ def _load_token_cache(module_name: str, gateway_url: str) -> dict | None:
1223
+ home = os.environ.get("HOME") or os.environ.get("USERPROFILE") or os.path.expanduser("~")
1224
+ token_file = os.path.join(home, ".kite", "remote", module_name, "tokens", _gateway_to_filename(gateway_url))
1225
+ if not os.path.exists(token_file): return None
1226
+ try:
1227
+ with open(token_file, "r") as f: return json.load(f)
1228
+ except Exception: return None
1229
+
1230
+ def _clear_token_cache(module_name: str, gateway_url: str):
1231
+ home = os.environ.get("HOME") or os.environ.get("USERPROFILE") or os.path.expanduser("~")
1232
+ token_file = os.path.join(home, ".kite", "remote", module_name, "tokens", _gateway_to_filename(gateway_url))
1233
+ try: os.remove(token_file)
1234
+ except Exception: pass
1235
+
1236
+ def _get_kite_token(module_name: str, gateway_url: str) -> str:
1237
+ token = os.environ.get("KITE_TOKEN")
1238
+ if token: return token
1239
+ cache = _load_token_cache(module_name, gateway_url)
1240
+ if cache: return cache["token"]
1241
+ print(f"[{module_name}] No token for {gateway_url}")
1242
+ print(f" export KITE_TOKEN=<token>")
1243
+ sys.exit(1)
1244
+
1245
+
1246
+ if __name__ == "__main__":
1247
+ kernel_port = os.environ.get("KITE_KERNEL_PORT")
1248
+ if kernel_port:
1249
+ _run_local_mode(int(kernel_port))
1250
+ else:
1251
+ config = _load_module_config()
1252
+ gateway_url = config.get("gateway_url") or os.environ.get("KITE_GATEWAY_URL")
1253
+ if gateway_url:
1254
+ _run_remote_mode(gateway_url)
1255
+ else:
1256
+ print(f"[{MODULE_NAME}] ERROR: No KITE_KERNEL_PORT and no gateway_url")
1257
+ sys.exit(1)
1258
+