@agentunion/kite 1.5.0 → 1.6.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/skills/kite/checklists/feature-checklist.md +496 -0
- package/.claude/skills/kite/references/event-patterns.md +180 -0
- package/.claude/skills/kite/references/health-check.md +202 -0
- package/.claude/skills/kite/references/http-service.md +199 -0
- package/.claude/skills/kite/references/module-md-spec.md +172 -0
- package/.claude/skills/kite/references/multi-connection.md +147 -0
- package/.claude/skills/kite/references/rpc-patterns.md +199 -0
- package/.claude/skills/kite/references/shutdown-sequence.md +146 -0
- package/.claude/skills/kite/references/stdin-protocol.md +147 -0
- package/.claude/skills/kite/references/test-center-integration.md +178 -0
- package/.claude/skills/kite/references/ws-lifecycle.md +301 -0
- package/.claude/skills/kite/skill.md +272 -0
- package/.claude/skills/kite/templates/go/README.md +20 -0
- package/.claude/skills/kite/templates/node/entry.js +134 -0
- package/.claude/skills/kite/templates/node/module.md +16 -0
- package/.claude/skills/kite/templates/node/server.js +351 -0
- package/.claude/skills/kite/templates/node/server_http.js +90 -0
- package/.claude/skills/kite/templates/python/entry.py +425 -0
- package/.claude/skills/kite/templates/python/module.md +26 -0
- package/.claude/skills/kite/templates/python/server.py +447 -0
- package/.claude/skills/kite/templates/python/server_http.py +433 -0
- package/cli.js +38 -4
- package/core/env_checker.py +96 -0
- package/docs/05-/347/237/255/344/277/241/350/256/244/350/257/201/344/270/216/347/224/250/346/210/267/344/277/241/346/201/257/346/216/245/345/217/243/346/226/207/346/241/243.md +507 -0
- package/docs/ACP/345/215/217/350/256/256/345/205/274/345/256/271/346/226/271/346/241/210.md +138 -0
- package/docs/CI/344/270/216AI/350/207/252/345/212/250/345/214/226/346/265/213/350/257/225/346/226/271/346/241/210.md +75 -0
- package/docs/CLI/345/274/200/345/217/221/350/256/241/345/210/222.md +595 -0
- package/docs/ClaudeCode/350/277/234/347/250/213/345/215/217/344/275/234/347/263/273/347/273/237-/346/212/200/346/234/257/350/257/204/344/274/260.md +535 -0
- package/docs/ClaudeCode/350/277/234/347/250/213/345/215/217/344/275/234/347/263/273/347/273/237/350/256/276/350/256/241.md +631 -0
- package/docs/Evol-App/344/275/277/347/224/250KernelClient/346/224/271/351/200/240/345/256/214/346/210/220.md +342 -0
- package/docs/Evol/346/216/247/345/210/266/345/217/260/346/217/222/344/273/266/345/214/226/346/236/266/346/236/204/346/246/202/350/246/201.md +604 -0
- package/docs/Evol/346/216/247/345/210/266/345/217/260/346/217/222/344/273/266/345/214/226/346/236/266/346/236/204/350/256/276/350/256/241.md +1708 -0
- package/docs/Evol/346/250/241/345/235/227/350/256/276/350/256/241/346/226/271/346/241/210.md +1154 -0
- package/docs/Evol/351/241/265/351/235/242/346/217/222/344/273/266/345/214/226-Evol/346/250/241/345/235/227/345/256/236/346/226/275/346/214/207/345/215/227.md +403 -0
- package/docs/Evol/351/241/265/351/235/242/346/217/222/344/273/266/345/214/226-/345/244/226/351/203/250/346/250/241/345/235/227/346/216/245/345/205/245/346/214/207/345/215/227.md +468 -0
- package/docs/HTTP-RPC/350/277/201/347/247/273/345/210/260WebSocket/350/256/241/345/210/222.md +318 -0
- package/docs/INDEX.md +388 -0
- package/docs/KITE_DOCS_GUIDE.md +33 -0
- package/docs/Kernel-Client-Kite-Token/346/224/257/346/214/201/345/256/236/346/226/275/345/256/214/346/210/220.md +330 -0
- package/docs/Kernel/344/270/273/345/212/250Ping/346/234/272/345/210/266-/346/255/243/347/241/256/345/256/236/347/216/260.md +235 -0
- package/docs/Kernel/344/270/273/345/212/250Ping/346/234/272/345/210/266/345/256/236/346/226/275/346/200/273/347/273/223.md +204 -0
- package/docs/Kite/345/256/211/350/243/205/351/227/256/351/242/230/350/247/243/345/206/263/346/226/271/346/241/210.md +362 -0
- package/docs/Kite/346/216/247/345/210/266/345/217/260/346/217/222/344/273/266/345/214/226/346/236/266/346/236/204/350/256/276/350/256/241-/347/273/210/346/236/201/347/233/256/346/240/207.md +721 -0
- package/docs/Kite/346/216/247/345/210/266/345/217/260/347/273/237/344/270/200WebSocket/346/224/271/351/200/240/346/226/271/346/241/210.md +821 -0
- package/docs/Kite/346/241/206/346/236/266/350/256/276/350/256/241/01-/346/241/206/346/236/266/345/256/232/344/275/215.md +12 -0
- package/docs/Kite/346/241/206/346/236/266/350/256/276/350/256/241/02-/346/240/270/345/277/203/346/246/202/345/277/265.md +341 -0
- package/docs/Kite/346/241/206/346/236/266/350/256/276/350/256/241/03-/347/263/273/347/273/237/346/236/266/346/236/204.md +257 -0
- package/docs/Kite/346/241/206/346/236/266/350/256/276/350/256/241/04-/346/250/241/345/235/227/350/247/204/350/214/203.md +263 -0
- package/docs/Kite/346/241/206/346/236/266/350/256/276/350/256/241/05-/346/240/270/345/277/203/346/265/201/347/250/213-/346/226/260/347/211/210.md +267 -0
- package/docs/Kite/346/241/206/346/236/266/350/256/276/350/256/241/05-/346/240/270/345/277/203/346/265/201/347/250/213.md +149 -0
- package/docs/Kite/346/241/206/346/236/266/350/256/276/350/256/241/06-/347/233/256/345/275/225/347/273/223/346/236/204.md +231 -0
- package/docs/Kite/346/241/206/346/236/266/350/256/276/350/256/241/07-/346/225/260/346/215/256/346/250/241/345/236/213.md +68 -0
- package/docs/Kite/346/241/206/346/236/266/350/256/276/350/256/241/08-/346/211/251/345/261/225/346/200/247.md +34 -0
- package/docs/Kite/346/241/206/346/236/266/350/256/276/350/256/241/09-/344/270/216/345/205/267/344/275/223/345/272/224/347/224/250/347/232/204/345/205/263/347/263/273.md +22 -0
- package/docs/Kite/346/241/206/346/236/266/350/256/276/350/256/241/README.md +46 -0
- package/docs/Kite/347/263/273/347/273/237/345/220/257/345/212/250/346/265/201/347/250/213.md +567 -0
- package/docs/Launcher/345/220/257/345/212/250/345/231/250/346/226/207/346/241/243.md +745 -0
- package/docs/Polyglot/350/277/220/350/241/214/346/227/266/344/270/216Clawdbot/345/205/274/345/256/271/346/200/247/350/256/276/350/256/241.md +321 -0
- package/docs/Redis/344/270/216/346/250/241/345/235/227/345/244/232/345/256/236/344/276/213/346/226/271/346/241/210.md +438 -0
- package/docs/Relay-Kite-Token/350/256/244/350/257/201/345/256/236/346/226/275/345/256/214/346/210/220.md +178 -0
- package/docs/Relay-Token/346/235/203/351/231/220/351/205/215/347/275/256/351/252/214/350/257/201.md +113 -0
- package/docs/Watchdog/345/201/245/345/272/267/346/243/200/346/237/245/344/270/216WebSocket-Ping/346/234/272/345/210/266/345/210/206/346/236/220.md +367 -0
- package/docs/Watchdog/350/265/204/346/272/220/347/233/221/346/216/247/347/255/226/347/225/245.md +92 -0
- package/docs/WebSocket/346/216/245/346/224/266/345/276/252/347/216/257/346/255/273/351/224/201/351/230/262/350/214/203/350/247/204/350/214/203.md +357 -0
- package/docs/WebSocket/350/277/236/346/216/245/351/237/247/346/200/247/344/270/216/351/207/215/350/277/236/346/234/272/345/210/266/345/256/214/346/225/264/346/226/271/346/241/210.md +531 -0
- package/docs/WebSocket/350/277/236/346/216/245/351/237/247/346/200/247/346/226/271/346/241/210.md +169 -0
- package/docs/WebSocket/351/207/215/350/277/236/346/234/272/345/210/266/346/265/213/350/257/225/346/212/245/345/221/212.md +169 -0
- package/docs/WebSocket/351/207/215/350/277/236/351/200/200/351/201/277/346/234/272/345/210/266/346/226/271/346/241/210.md +394 -0
- package/docs/Web/346/250/241/345/235/227/344/270/216Evol/346/250/241/345/235/227/351/207/215/346/236/204/345/210/206/346/236/220.md +521 -0
- package/docs/audit-api-guide.md +68 -0
- package/docs/audit-module-design.md +315 -0
- package/docs/audit-module-implementation-summary.md +149 -0
- package/docs/llm-context-design.md +52 -0
- package/docs/llm-test-enhancement-plan.md +970 -0
- package/docs/logs-api-guide.md +42 -0
- package/docs/npm/345/214/205Python/347/216/257/345/242/203/347/256/241/347/220/206/346/226/271/346/241/210.md +302 -0
- package/docs/npm/345/217/221/345/270/203/344/270/216CLI/344/275/277/347/224/250/346/214/207/345/215/227.md +245 -0
- package/docs/stdio/344/270/216/347/253/257/345/217/243/345/217/221/347/216/260/351/207/215/346/236/204.md +480 -0
- package/docs/web/346/250/241/345/235/227/344/270/255/350/275/254/346/234/215/345/212/241/350/256/276/350/256/241/346/226/271/346/241/210.md +449 -0
- package/docs//344/272/213/344/273/266/345/244/204/347/220/206/346/234/272/345/210/266.md +388 -0
- package/docs//344/272/213/344/273/266/345/244/204/347/220/206/350/247/204/350/214/203.md +113 -0
- package/docs//344/272/213/344/273/266/350/256/242/351/230/205/351/200/232/351/205/215/347/254/246/350/247/204/350/214/203.md +256 -0
- package/docs//344/272/213/344/273/266/351/230/237/345/210/227/345/274/271/346/200/247/347/256/241/347/220/206.md +449 -0
- package/docs//344/272/244/344/272/222/345/274/217/347/273/210/347/253/257/346/216/247/345/210/266/346/226/271/346/241/210.md +301 -0
- package/docs//344/273/243/347/220/206/345/220/257/345/212/250/345/231/250/344/270/216/345/256/271/345/231/250/345/214/226.md +140 -0
- package/docs//344/273/243/347/240/201/347/273/237/350/256/241/345/267/245/345/205/267/344/275/277/347/224/250/350/257/264/346/230/216.md +217 -0
- package/docs//344/274/230/351/233/205/351/200/200/345/207/272/350/247/204/350/214/203.md +362 -0
- package/docs//344/276/235/350/265/226/347/256/241/347/220/206/350/257/264/346/230/216.md +141 -0
- package/docs//344/277/256/345/244/215/346/235/203/351/231/220/351/227/256/351/242/230-evol-RPC/346/235/203/351/231/220.md +268 -0
- package/docs//345/210/240/351/231/244kernel-client-example/345/256/214/346/210/220.md +309 -0
- package/docs//345/210/240/351/231/244ws-management/345/256/214/346/210/220.md +418 -0
- package/docs//345/220/257/345/212/250/344/274/230/345/214/226/346/226/271/346/241/210.md +522 -0
- package/docs//345/220/257/345/212/250/344/276/235/350/265/226/344/270/216/346/216/222/345/272/217.md +105 -0
- package/docs//345/256/211/350/243/205/350/204/232/346/234/254/345/274/200/345/217/221/346/226/207/346/241/243.md +643 -0
- package/docs//345/256/214/346/225/264/345/220/257/345/212/250/346/265/201/347/250/213/350/256/276/350/256/241.md +452 -0
- package/docs//345/256/236/347/216/260/350/247/204/345/210/222.md +195 -0
- package/docs//345/277/203/350/267/263/346/234/272/345/210/266/351/207/215/346/236/204/346/200/273/347/273/223.md +166 -0
- package/docs//346/217/241/346/211/213/350/256/244/350/257/201/346/226/271/346/241/210-/345/256/211/345/205/250/345/256/241/346/237/245.md +176 -0
- package/docs//346/217/241/346/211/213/350/256/244/350/257/201/346/226/271/346/241/210.md +908 -0
- package/docs//346/226/207/346/241/243/346/233/264/346/226/260/346/270/205/345/215/225.md +83 -0
- package/docs//346/227/245/345/277/227/344/270/216/345/274/202/345/270/270/345/244/204/347/220/206/350/247/204/350/214/203.md +829 -0
- package/docs//346/227/245/345/277/227/350/260/203/350/257/225/345/256/236/346/210/230/346/214/207/345/215/227.md +25 -0
- package/docs//346/236/266/346/236/204/345/200/237/351/211/264/346/214/207/345/215/227.md +977 -0
- package/docs//346/236/266/346/236/204/346/224/271/351/200/240-/345/256/214/346/210/220/346/200/273/347/273/223.md +440 -0
- package/docs//346/236/266/346/236/204/347/216/260/347/212/266/344/270/216/347/273/210/346/236/201/347/233/256/346/240/207/345/257/271/346/257/224/345/210/206/346/236/220.md +508 -0
- package/docs//346/250/241/345/235/227/345/244/232/350/277/236/346/216/245/346/216/247/345/210/266/347/255/226/347/225/245.md +220 -0
- package/docs//346/250/241/345/235/227/345/256/211/350/243/205/346/234/272/345/210/266/350/256/276/350/256/241.md +500 -0
- package/docs//346/250/241/345/235/227/345/274/200/345/217/221/346/214/207/345/215/227.md +1824 -0
- package/docs//346/250/241/345/235/227/347/203/255/346/233/264/346/226/260.md +89 -0
- package/docs//346/250/241/345/235/227/350/277/234/347/250/213/351/203/250/347/275/262/345/274/200/345/217/221/350/247/204/350/214/203.md +460 -0
- package/docs//346/250/241/345/235/227/351/200/200/345/207/272/346/234/272/345/210/266/345/256/214/346/225/264/346/226/271/346/241/210.md +303 -0
- package/docs//346/250/241/345/235/227/351/205/215/347/275/256/345/212/240/350/275/275/344/270/216/347/203/255/351/207/215/350/275/275/350/247/204/350/214/203.md +369 -0
- package/docs//346/265/213/350/257/225/344/270/255/345/277/203/346/267/273/345/212/240/346/250/241/345/235/227/346/265/213/350/257/225/346/214/207/345/215/227.md +147 -0
- package/docs//347/211/210/346/234/254/351/224/201/345/256/232/347/216/257/345/242/203/347/256/241/347/220/206/346/226/271/346/241/210.md +331 -0
- package/docs//347/216/257/345/242/203/345/217/230/351/207/217/344/270/216/350/277/220/350/241/214/346/227/266/347/233/256/345/275/225/350/256/276/350/256/241.md +499 -0
- package/docs//347/216/257/345/242/203/347/256/241/347/220/206/345/256/214/346/225/264/346/226/271/346/241/210.md +334 -0
- package/docs//350/231/232/346/213/237/346/250/241/345/235/227/344/270/255/350/275/254/346/234/215/345/212/241/345/256/214/346/225/264/350/256/276/350/256/241.md +1496 -0
- package/docs//350/231/232/346/213/237/347/216/257/345/242/203/345/267/245/344/275/234/345/216/237/347/220/206.md +163 -0
- package/docs//350/256/241/345/210/222/347/256/241/347/220/206/345/231/250/344/275/277/347/224/250/346/214/207/345/215/227.md +196 -0
- package/docs//350/256/244/350/257/201/346/250/241/345/235/227/344/270/216Gateway/350/256/276/350/256/241/346/226/271/346/241/210.md +765 -0
- package/docs//350/277/234/347/250/213/346/250/241/345/235/227/350/256/276/350/256/241-/346/227/247/347/211/210.md +1117 -0
- package/docs//350/277/234/347/250/213/346/250/241/345/235/227/350/256/276/350/256/241.md +451 -0
- package/docs//351/207/215/346/236/204/346/234/272/345/210/266/346/270/205/345/215/225.md +192 -0
- package/docs//351/223/276/350/267/257/350/277/275/350/270/252/346/226/271/346/241/210.md +242 -0
- package/docs//351/231/215/347/272/247/347/255/226/347/225/245/350/256/276/350/256/241/346/226/271/346/241/210.md +618 -0
- package/extensions/agents/assistant/entry.py +113 -14
- package/extensions/agents/assistant/module.md +27 -22
- package/extensions/agents/assistant/server.py +291 -105
- package/extensions/channels/acp_channel/entry.py +114 -16
- package/extensions/channels/acp_channel/module.md +4 -0
- package/extensions/channels/acp_channel/server.py +396 -105
- package/extensions/channels/phone_channel/__init__.py +1 -0
- package/extensions/channels/phone_channel/entry.py +503 -0
- package/extensions/channels/phone_channel/module.md +31 -0
- package/extensions/channels/phone_channel/server.py +686 -0
- package/extensions/event_hub_bench/entry.py +55 -12
- package/extensions/event_hub_bench/module.md +27 -27
- package/extensions/services/audit/README.md +134 -0
- package/extensions/services/audit/collector.py +73 -0
- package/extensions/services/audit/entry.py +444 -0
- package/extensions/services/audit/module.md +66 -0
- package/extensions/services/audit/query_audit.py +111 -0
- package/extensions/services/audit/routes/__init__.py +1 -0
- package/extensions/services/audit/routes/routes_audit.py +113 -0
- package/extensions/services/audit/schemas/__init__.py +5 -0
- package/extensions/services/audit/schemas/audit_event.py +92 -0
- package/extensions/services/audit/server.py +542 -0
- package/extensions/services/audit/storage.py +95 -0
- package/extensions/services/auth/entry.py +1054 -0
- package/extensions/services/auth/module.md +31 -0
- package/extensions/services/auth/token_store.py +185 -0
- package/extensions/services/auth/verifiers/evol_account.py +101 -0
- package/extensions/services/auth/verifiers/kite_token.py +38 -0
- package/extensions/services/auth/verifiers/pairing_code.py +71 -0
- package/extensions/services/backup/entry.py +494 -197
- package/extensions/services/backup/module.md +4 -2
- package/extensions/services/dataclaw/api/__init__.py +0 -0
- package/extensions/services/dataclaw/api/admin.py +367 -0
- package/extensions/services/dataclaw/api/copyright.py +175 -0
- package/extensions/services/dataclaw/api/credits.py +177 -0
- package/extensions/services/dataclaw/api/data.py +179 -0
- package/extensions/services/dataclaw/api/demands.py +269 -0
- package/extensions/services/dataclaw/api/feeds.py +262 -0
- package/extensions/services/dataclaw/api/identity.py +505 -0
- package/extensions/services/dataclaw/api/notifications.py +104 -0
- package/extensions/services/dataclaw/api/reviews.py +138 -0
- package/extensions/services/dataclaw/api/search.py +153 -0
- package/extensions/services/dataclaw/api/subscriptions.py +157 -0
- package/extensions/services/dataclaw/config.json5 +96 -0
- package/extensions/services/dataclaw/core/__init__.py +0 -0
- package/extensions/services/dataclaw/core/auth.py +95 -0
- package/extensions/services/dataclaw/core/config.py +50 -0
- package/extensions/services/dataclaw/core/database.py +70 -0
- package/extensions/services/dataclaw/entry.py +416 -0
- package/extensions/services/dataclaw/gofeed/351/241/271/347/233/256/346/211/200/346/234/211/346/235/203/350/275/254/347/247/273/346/265/201/347/250/213/350/257/264/346/230/216.md +309 -0
- package/extensions/services/dataclaw/migrate.py +283 -0
- package/extensions/services/dataclaw/models/__init__.py +0 -0
- package/extensions/services/dataclaw/module.md +49 -0
- package/extensions/services/dataclaw/requirements.txt +18 -0
- package/extensions/services/dataclaw/server.py +759 -0
- package/extensions/services/dataclaw/services/__init__.py +0 -0
- package/extensions/services/dataclaw/services/agent_service.py +132 -0
- package/extensions/services/dataclaw/services/credit_service.py +235 -0
- package/extensions/services/dataclaw/services/email_service.py +140 -0
- package/extensions/services/dataclaw/services/feed_service.py +259 -0
- package/extensions/services/dataclaw/services/notification_service.py +209 -0
- package/extensions/services/dataclaw/services/oauth_service.py +275 -0
- package/extensions/services/dataclaw/services/pricing.py +102 -0
- package/extensions/services/dataclaw/services/quality.py +79 -0
- package/extensions/services/dataclaw/services/reputation.py +142 -0
- package/extensions/services/dataclaw/services/sms_service.py +174 -0
- package/extensions/services/dataclaw/static/css/common.css +853 -0
- package/extensions/services/dataclaw/static/css/themes/blue.css +42 -0
- package/extensions/services/dataclaw/static/css/themes/dark.css +42 -0
- package/extensions/services/dataclaw/static/css/themes/light.css +35 -0
- package/extensions/services/dataclaw/static/js/api.js +103 -0
- package/extensions/services/dataclaw/static/js/common.js +321 -0
- package/extensions/services/dataclaw/static/js/i18n.js +95 -0
- package/extensions/services/dataclaw/static/js/pages/admin.js +152 -0
- package/extensions/services/dataclaw/static/js/pages/dashboard.js +82 -0
- package/extensions/services/dataclaw/static/js/pages/feed-detail.js +180 -0
- package/extensions/services/dataclaw/static/js/pages/feed-manage.js +158 -0
- package/extensions/services/dataclaw/static/js/theme.js +46 -0
- package/extensions/services/dataclaw/static/locales/en-US.json +464 -0
- package/extensions/services/dataclaw/static/locales/ja-JP.json +464 -0
- package/extensions/services/dataclaw/static/locales/zh-CN.json +464 -0
- package/extensions/services/dataclaw/templates/admin/index.html +90 -0
- package/extensions/services/dataclaw/templates/base.html +136 -0
- package/extensions/services/dataclaw/templates/credits/balance.html +106 -0
- package/extensions/services/dataclaw/templates/credits/deposit.html +164 -0
- package/extensions/services/dataclaw/templates/credits/history.html +90 -0
- package/extensions/services/dataclaw/templates/dashboard.html +52 -0
- package/extensions/services/dataclaw/templates/demands/create.html +78 -0
- package/extensions/services/dataclaw/templates/demands/detail.html +136 -0
- package/extensions/services/dataclaw/templates/demands/list.html +94 -0
- package/extensions/services/dataclaw/templates/feeds/create.html +95 -0
- package/extensions/services/dataclaw/templates/feeds/detail.html +110 -0
- package/extensions/services/dataclaw/templates/feeds/list.html +110 -0
- package/extensions/services/dataclaw/templates/feeds/manage.html +88 -0
- package/extensions/services/dataclaw/templates/index.html +185 -0
- package/extensions/services/dataclaw/templates/login.html +246 -0
- package/extensions/services/dataclaw/templates/register.html +164 -0
- package/extensions/services/dataclaw/templates/settings/notifications.html +96 -0
- package/extensions/services/dataclaw/templates/settings/profile.html +167 -0
- package/extensions/services/dataclaw/templates/subscriptions/list.html +64 -0
- package/extensions/services/dataclaw/tests/__init__.py +0 -0
- package/extensions/services/dataclaw/tests/conftest.py +68 -0
- package/extensions/services/dataclaw/tests/integration/__init__.py +0 -0
- package/extensions/services/dataclaw/tests/integration/test_workflows.py +239 -0
- package/extensions/services/dataclaw/tests/unit/__init__.py +0 -0
- package/extensions/services/dataclaw/tests/unit/test_admin.py +70 -0
- package/extensions/services/dataclaw/tests/unit/test_copyright.py +63 -0
- package/extensions/services/dataclaw/tests/unit/test_credits.py +80 -0
- package/extensions/services/dataclaw/tests/unit/test_data.py +98 -0
- package/extensions/services/dataclaw/tests/unit/test_demands.py +106 -0
- package/extensions/services/dataclaw/tests/unit/test_feeds.py +98 -0
- package/extensions/services/dataclaw/tests/unit/test_identity.py +88 -0
- package/extensions/services/dataclaw/tests/unit/test_notifications.py +36 -0
- package/extensions/services/dataclaw/tests/unit/test_reviews.py +68 -0
- package/extensions/services/dataclaw/tests/unit/test_search.py +64 -0
- package/extensions/services/dataclaw/tests/unit/test_subscriptions.py +65 -0
- package/extensions/services/dataclaw/tests/unit/test_system.py +106 -0
- package/extensions/services/dataclaw/utils/__init__.py +0 -0
- package/extensions/services/dataclaw/utils/crypto.py +38 -0
- package/extensions/services/dataclaw/utils/id_generator.py +52 -0
- package/extensions/services/dataclaw/ws/__init__.py +0 -0
- package/extensions/services/dataclaw/ws/handler.py +163 -0
- package/extensions/services/dataclaw//345/215/217/350/256/2561-/351/241/271/347/233/256/346/235/241/344/273/266/346/216/210/346/235/203/344/270/216/350/202/241/346/235/203/345/257/271/344/273/267/345/215/217/350/256/256.md +243 -0
- package/extensions/services/dataclaw//345/215/217/350/256/2562-/351/241/271/347/233/256/350/264/255/344/271/260/346/235/203/344/270/216/345/244/226/345/214/205/345/247/224/346/211/230/345/274/200/345/217/221/345/215/217/350/256/256.md +434 -0
- package/extensions/services/evol/__init__.py +1 -0
- package/extensions/services/evol/async_http.py +551 -0
- package/extensions/services/evol/auth_manager.py +602 -443
- package/extensions/services/evol/config.json5 +16 -0
- package/extensions/services/evol/entry.py +568 -406
- package/extensions/services/evol/evol_api.py +969 -173
- package/extensions/services/evol/mfa_totp.py +77 -0
- package/extensions/services/evol/module.md +150 -32
- package/extensions/services/evol/nonce_pool.py +113 -0
- package/extensions/services/evol/oauth_manager.py +223 -0
- package/extensions/services/evol/pairing.py +3 -2
- package/extensions/services/evol/pairing_codes.jsonl +1 -0
- package/extensions/services/evol/relay.py +1031 -682
- package/extensions/services/evol/relay_config.json5 +85 -67
- package/extensions/services/evol/routes/routes_llm.py +231 -0
- package/extensions/services/evol/routes/routes_rpc.py +90 -89
- package/extensions/services/evol/routes/routes_test.py +11 -4
- package/extensions/services/evol/server.py +2426 -875
- package/extensions/services/evol/static/assets/CommissionView-Cs_ys6Gm.js +1 -0
- package/extensions/services/evol/static/assets/CommissionView-DACet_Oo.css +1 -0
- package/extensions/services/evol/static/assets/IframePage-DbO11U9G.js +1 -0
- package/extensions/services/evol/static/assets/IframePage-c572lT8i.css +1 -0
- package/extensions/services/evol/static/assets/TeamDetailView-DULrGD7k.css +1 -0
- package/extensions/services/evol/static/assets/TeamDetailView-gy_MBEqG.js +139 -0
- package/extensions/services/evol/static/assets/element-plus-Bd7pZkkM.js +63 -0
- package/extensions/services/evol/static/assets/index-CmMONKzG.css +1 -0
- package/extensions/services/evol/static/assets/index-D44bBe__.js +2 -0
- package/extensions/services/evol/static/assets/vue-vendor-DtF-__I4.js +29 -0
- package/extensions/services/evol/static/index.html +16 -781
- package/extensions/services/evol/static/logo.png +0 -0
- package/extensions/services/evol/stats_manager.py +243 -240
- package/extensions/services/evol/web/README.md +89 -0
- package/extensions/services/evol/web/build.bat +44 -0
- package/extensions/services/evol/web/index.html +13 -0
- package/extensions/services/evol/web/package-lock.json +1718 -0
- package/extensions/services/evol/web/package.json +26 -0
- package/extensions/services/evol/web/public/logo.png +0 -0
- package/extensions/services/evol/web/src/App.vue +7 -0
- package/extensions/services/evol/web/src/components/layout/AppHeader.vue +202 -0
- package/extensions/services/evol/web/src/components/layout/AppLayout.vue +61 -0
- package/extensions/services/evol/web/src/components/layout/AppSidebar.vue +115 -0
- package/extensions/services/evol/web/src/components/login/LoginPage.vue +271 -0
- package/extensions/services/evol/web/src/components/team/AddMemberModal.vue +181 -0
- package/extensions/services/evol/web/src/components/team/GroupTreeNode.vue +156 -0
- package/extensions/services/evol/web/src/components/team/TeamAlertConfig.vue +221 -0
- package/extensions/services/evol/web/src/components/team/TeamBillModal.vue +165 -0
- package/extensions/services/evol/web/src/components/team/TeamMembersAndGroups.vue +499 -0
- package/extensions/services/evol/web/src/components/team/TeamStatsPanel.vue +907 -0
- package/extensions/services/evol/web/src/components/team/TreeNode.vue +331 -0
- package/extensions/services/evol/web/src/components/team/stats/StatsExportProgress.vue +44 -0
- package/extensions/services/evol/web/src/components/team/stats/StatsHeader.vue +89 -0
- package/extensions/services/evol/web/src/components/team/stats/StatsMemberDetail.vue +415 -0
- package/extensions/services/evol/web/src/components/team/stats/StatsSummary.vue +42 -0
- package/extensions/services/evol/web/src/components/team/stats/helpers.ts +195 -0
- package/extensions/services/evol/web/src/components/team/stats/stats.css +741 -0
- package/extensions/services/evol/web/src/components/team/stats/useStatsApi.ts +114 -0
- package/extensions/services/evol/web/src/components/team/stats/useStatsCharts.ts +242 -0
- package/extensions/services/evol/web/src/components/team/stats/useStatsExport.ts +232 -0
- package/extensions/services/evol/web/src/composables/useFormatters.ts +42 -0
- package/extensions/services/evol/web/src/composables/useTheme.ts +52 -0
- package/extensions/services/evol/web/src/env.d.ts +7 -0
- package/extensions/services/evol/web/src/i18n/en.ts +361 -0
- package/extensions/services/evol/web/src/i18n/index.ts +36 -0
- package/extensions/services/evol/web/src/i18n/zh.ts +379 -0
- package/extensions/services/evol/web/src/main.ts +21 -0
- package/extensions/services/evol/web/src/router/index.ts +81 -0
- package/extensions/services/evol/web/src/services/kernel-client.ts +406 -0
- package/extensions/services/evol/web/src/stores/auth.ts +189 -0
- package/extensions/services/evol/web/src/stores/connection.ts +134 -0
- package/extensions/services/evol/web/src/stores/pages.ts +79 -0
- package/extensions/services/evol/web/src/styles/base.css +213 -0
- package/extensions/services/evol/web/src/styles/variables.css +138 -0
- package/extensions/services/evol/web/src/types/rpc.ts +35 -0
- package/extensions/services/evol/web/src/types/token.ts +87 -0
- package/extensions/services/evol/web/src/views/AccountView.vue +1532 -0
- package/extensions/services/evol/web/src/views/AiServiceView.vue +219 -0
- package/extensions/services/evol/web/src/views/CommissionView.vue +1220 -0
- package/extensions/services/evol/web/src/views/CreditsView.vue +131 -0
- package/extensions/services/evol/web/src/views/EndpointView.vue +163 -0
- package/extensions/services/evol/web/src/views/IframePage.vue +120 -0
- package/extensions/services/evol/web/src/views/TeamDetailView.vue +473 -0
- package/extensions/services/evol/web/src/views/TeamView.vue +332 -0
- package/extensions/services/evol/web/tsconfig.json +31 -0
- package/extensions/services/evol/web/tsconfig.node.json +10 -0
- package/extensions/services/evol/web/vite.config.ts +49 -0
- package/extensions/services/evolmem/__init__.py +0 -0
- package/extensions/services/evolmem/entry.py +387 -0
- package/extensions/services/evolmem/hooks/__init__.py +0 -0
- package/extensions/services/evolmem/hooks/assistant_stop.py +228 -0
- package/extensions/services/evolmem/hooks/common.py +76 -0
- package/extensions/services/evolmem/hooks/pre_tool_use.py +56 -0
- package/extensions/services/evolmem/hooks/session_end.py +133 -0
- package/extensions/services/evolmem/hooks/session_start.py +229 -0
- package/extensions/services/evolmem/hooks/user_prompt.py +122 -0
- package/extensions/services/evolmem/module.md +48 -0
- package/extensions/services/evolmem/prompts/00-server-info.md +28 -0
- package/extensions/services/evolmem/prompts/01-behavior.md +46 -0
- package/extensions/services/evolmem/prompts/02-summary-format.md +112 -0
- package/extensions/services/evolmem/prompts/03-file-query.md +92 -0
- package/extensions/services/evolmem/prompts/04-topic-stats.md +11 -0
- package/extensions/services/evolmem/prompts/05-recent-topics.md +84 -0
- package/extensions/services/evolmem/scripts/__init__.py +0 -0
- package/extensions/services/evolmem/scripts/extract_keywords.py +40 -0
- package/extensions/services/evolmem/scripts/search_topics.py +91 -0
- package/extensions/services/evolmem/server.py +641 -0
- package/extensions/services/gateway/entry.py +964 -0
- package/extensions/services/gateway/module.md +29 -0
- package/extensions/services/gateway/nonce_pool.py +65 -0
- package/extensions/services/gateway/relay.py +133 -0
- package/extensions/services/gateway/ws_server.py +285 -0
- package/extensions/services/kite_console/auth_manager.py +603 -0
- package/extensions/services/kite_console/config.json5 +19 -0
- package/extensions/services/kite_console/config_loader.py +117 -0
- package/extensions/services/kite_console/entry.py +528 -0
- package/extensions/services/kite_console/evol_api.py +179 -0
- package/extensions/services/kite_console/evol_config.json5 +29 -0
- package/extensions/services/kite_console/mfa_totp.py +77 -0
- package/extensions/services/kite_console/migrate_tokens.py +122 -0
- package/extensions/services/kite_console/module.md +37 -0
- package/extensions/services/kite_console/nonce_pool.py +113 -0
- package/extensions/services/kite_console/oauth_manager.py +223 -0
- package/extensions/services/kite_console/pairing.py +280 -0
- package/extensions/services/kite_console/pairing_codes.jsonl +2 -0
- package/extensions/services/kite_console/relay.py +1350 -0
- package/extensions/services/kite_console/relay_config.json5 +96 -0
- package/extensions/services/kite_console/routes/__init__.py +1 -0
- package/extensions/services/kite_console/routes/routes_llm.py +231 -0
- package/extensions/services/kite_console/routes/routes_proxy.py +115 -0
- package/extensions/services/kite_console/routes/routes_rpc.py +89 -0
- package/extensions/services/kite_console/routes/routes_test.py +68 -0
- package/extensions/services/kite_console/server.py +1742 -0
- package/extensions/services/{evol → kite_console}/static/css/style.css +656 -2
- package/extensions/services/kite_console/static/index.html +1524 -0
- package/extensions/services/{evol → kite_console}/static/js/dialog.js +11 -4
- package/extensions/services/kite_console/static/js/evol-app.js +7740 -0
- package/extensions/services/{evol/static/js/evol-app.js → kite_console/static/js/evol-app.js.backup} +2777 -1949
- package/extensions/services/kite_console/static/js/kernel-client.js +560 -0
- package/extensions/services/{evol/static/js/kernel-client.js → kite_console/static/js/kernel-client.js.backup} +41 -3
- package/extensions/services/{evol → kite_console}/static/js/registry-tests.js +7 -0
- package/extensions/services/kite_console/static/js/tests/ARCHITECTURE.md +67 -0
- package/extensions/services/kite_console/static/js/tests/README.md +140 -0
- package/extensions/services/kite_console/static/js/tests/index.js +161 -0
- package/extensions/services/kite_console/static/js/tests/integration/auth.js +120 -0
- package/extensions/services/kite_console/static/js/tests/integration/channel-interaction.js +188 -0
- package/extensions/services/kite_console/static/js/tests/integration/elastic-connection.js +115 -0
- package/extensions/services/kite_console/static/js/tests/integration/full-workflow.js +43 -0
- package/extensions/services/kite_console/static/js/tests/integration/multi-instance.js +304 -0
- package/extensions/services/kite_console/static/js/tests/integration/nested-rpc.js +266 -0
- package/extensions/services/kite_console/static/js/tests/integration/pingpong.js +25 -0
- package/extensions/services/kite_console/static/js/tests/integration/redis.js +227 -0
- package/extensions/services/kite_console/static/js/tests/integration/registry-core.js +52 -0
- package/extensions/services/kite_console/static/js/tests/integration/remote-deploy.js +85 -0
- package/extensions/services/kite_console/static/js/tests/integration/require-init.js +96 -0
- package/extensions/services/kite_console/static/js/tests/integration/scaling-control.js +193 -0
- package/extensions/services/kite_console/static/js/tests/integration/trace.js +109 -0
- package/extensions/services/kite_console/static/js/tests/modules/acp_channel.js +339 -0
- package/extensions/services/kite_console/static/js/tests/modules/auth.js +96 -0
- package/extensions/services/kite_console/static/js/tests/modules/backup.js +49 -0
- package/extensions/services/kite_console/static/js/tests/modules/gateway.js +41 -0
- package/extensions/services/kite_console/static/js/tests/modules/kernel.js +90 -0
- package/extensions/services/kite_console/static/js/tests/modules/launcher.js +75 -0
- package/extensions/services/kite_console/static/js/tests/modules/multi_instance.js +129 -0
- package/extensions/services/kite_console/static/js/tests/modules/phone_channel.js +364 -0
- package/extensions/services/kite_console/static/js/tests/modules/redis.js +178 -0
- package/extensions/services/kite_console/static/js/tests/modules/watchdog.js +60 -0
- package/extensions/services/kite_console/static/js/tests/modules/web.js +70 -0
- package/extensions/services/kite_console/static/js/tests/test-runner.js +123 -0
- package/extensions/services/kite_console/static/js/virtual-list.js +200 -0
- package/extensions/services/kite_console/static/test_kernel_client_token.html +352 -0
- package/extensions/services/kite_console/stats_manager.py +247 -0
- package/extensions/services/logs/README.md +215 -0
- package/extensions/services/logs/api_logger.py +37 -0
- package/extensions/services/logs/baseline.py +121 -0
- package/extensions/services/logs/cleaner.py +76 -0
- package/extensions/services/logs/entry.py +449 -0
- package/extensions/services/logs/formatter.py +129 -0
- package/extensions/services/logs/module.md +38 -0
- package/extensions/services/logs/quick_diagnostic.py +128 -0
- package/extensions/services/logs/routes/__init__.py +1 -0
- package/extensions/services/logs/routes/routes_logs.py +218 -0
- package/extensions/services/logs/routes/routes_logs.py.backup +173 -0
- package/extensions/services/logs/scanner.py +100 -0
- package/extensions/services/logs/searcher.py +263 -0
- package/extensions/services/logs/server.py +553 -0
- package/extensions/services/logs.zip +0 -0
- package/extensions/services/model_service/config.json5 +30 -0
- package/extensions/services/model_service/entry.py +620 -171
- package/extensions/services/model_service/module.md +11 -2
- package/extensions/services/proxy/__init__.py +0 -0
- package/extensions/services/proxy/aid_manager.py +419 -0
- package/extensions/services/proxy/auth_bridge.py +182 -0
- package/extensions/services/proxy/config_store.py +79 -0
- package/extensions/services/proxy/entry.py +528 -0
- package/extensions/services/proxy/evol/presenter/agentIdPresenter.py +2 -2
- package/extensions/services/proxy/evol/presenter/apikeyPresenter.py +18 -28
- package/extensions/services/proxy/evol/presenter/configPresenter.py +80 -1127
- package/extensions/services/proxy/evol/presenter/userPresenter.py +71 -477
- package/extensions/services/proxy/evol/server/claude_proxy_async.py +11 -7
- package/extensions/services/proxy/module.md +151 -0
- package/extensions/services/proxy/server.py +952 -271
- package/extensions/services/redis/ALIGNMENT_CHECKLIST.md +121 -0
- package/extensions/services/redis/ALIGNMENT_STATUS.md +548 -0
- package/extensions/services/redis/config.json5 +8 -0
- package/extensions/services/redis/entry.py +1509 -0
- package/extensions/services/redis/entry.py.backup +405 -0
- package/extensions/services/redis/module.md +48 -0
- package/extensions/services/redis/redis_builtin.py +332 -0
- package/extensions/services/redis/redis_external.py +164 -0
- package/extensions/services/testUi/entry.py +446 -0
- package/extensions/services/testUi/module.md +18 -0
- package/extensions/services/testUi/ui/cards.html +131 -0
- package/extensions/services/testUi/ui/index.html +22 -0
- package/extensions/services/testUi/ui/particles.html +143 -0
- package/extensions/services/watchdog/entry.py +1258 -793
- package/extensions/services/watchdog/module.md +2 -0
- package/extensions/services/watchdog/monitor.py +465 -87
- package/extensions/services/web/auth_manager.py +602 -0
- package/extensions/services/web/config.json5 +11 -0
- package/extensions/services/web/entry.py +598 -478
- package/extensions/services/web/mfa_totp.py +77 -0
- package/extensions/services/web/module.md +16 -13
- package/extensions/services/web/nonce_pool.py +113 -0
- package/extensions/services/web/oauth_manager.py +223 -0
- package/extensions/services/web/pairing.py +3 -2
- package/extensions/services/web/pairing_codes.jsonl +1 -0
- package/extensions/services/web/relay.py +442 -63
- package/extensions/services/web/relay_config.json5 +1 -2
- package/extensions/services/web/routes/routes_rpc.py +6 -6
- package/extensions/services/web/server.py +360 -173
- package/extensions/services/web/static/index.html +1752 -1738
- package/extensions/services/web/static/js/app.js +32 -0
- package/extensions/services/web/static/js/kernel-client.js +48 -9
- package/extensions/services/web/vendor/bluetooth/audio.py +1 -1
- package/extensions/services/web/vendor/config.py +2 -2
- package/extensions/services/web/vendor/storage/identity.py +1 -1
- package/kernel/entry.py +77 -23
- package/kernel/event_hub.py +1122 -74
- package/kernel/module.md +2 -1
- package/kernel/registry_store.py +208 -11
- package/kernel/rpc_router.py +1400 -491
- package/kernel/server.py +1021 -134
- package/kite_cli/__init__.py +9 -1
- package/kite_cli/builders/__init__.py +4 -0
- package/kite_cli/builders/base.py +67 -0
- package/kite_cli/builders/custom.py +31 -0
- package/kite_cli/builders/detector.py +56 -0
- package/kite_cli/builders/go.py +34 -0
- package/kite_cli/builders/gradle.py +41 -0
- package/kite_cli/builders/maven.py +36 -0
- package/kite_cli/builders/npm.py +44 -0
- package/kite_cli/builders/python.py +37 -0
- package/kite_cli/commands/BUILD_GUIDE.md +109 -0
- package/kite_cli/commands/build.py +142 -0
- package/kite_cli/commands/check.py +60 -0
- package/kite_cli/commands/config.py +156 -0
- package/kite_cli/commands/deps.py +58 -0
- package/kite_cli/commands/deps_install.py +7 -7
- package/kite_cli/commands/disable.py +162 -0
- package/kite_cli/commands/enable.py +162 -0
- package/kite_cli/commands/export.py +96 -0
- package/kite_cli/commands/import_cmd.py +110 -0
- package/kite_cli/commands/install.py +50 -23
- package/kite_cli/commands/install_skill.py +107 -0
- package/kite_cli/commands/list.py +128 -31
- package/kite_cli/commands/outdated.py +202 -0
- package/kite_cli/commands/search.py +33 -17
- package/kite_cli/commands/update.py +115 -2
- package/kite_cli/commands/venv_setup.py +6 -6
- package/kite_cli/commands/why.py +48 -0
- package/kite_cli/core/config_manager.py +145 -0
- package/kite_cli/core/downloader.py +32 -2
- package/kite_cli/main.py +153 -7
- package/kite_cli/utils/colors.py +153 -0
- package/kite_cli/utils/dependency_graph.py +209 -0
- package/kite_cli/utils/process.py +55 -0
- package/kite_cli/utils/progress.py +207 -0
- package/kite_cli/utils/table.py +101 -0
- package/launcher/count_lines.py +192 -43
- package/launcher/entry.py +4543 -2802
- package/launcher/logging_setup.py +54 -1
- package/launcher/module.md +32 -6
- package/launcher/module_scanner.py +93 -20
- package/launcher/process_manager.py +355 -76
- package/main.py +6 -0
- package/package.json +4 -1
- package/requirements.txt +41 -38
- package/scripts/auto-fix-deps.py +128 -0
- package/scripts/env-manager.js +25 -2
- package/scripts/final-test.js +78 -0
- package/scripts/setup-python-env.js +700 -191
- package/scripts/test-alluser.js +48 -0
- package/scripts/test-different-version.js +86 -0
- package/scripts/test-direct.js +63 -0
- package/scripts/test-extract-installer.js +28 -0
- package/scripts/test-install-log.js +54 -0
- package/scripts/test-installer.js +39 -0
- package/scripts/test-integration.js +250 -0
- package/scripts/test-real-install.js +210 -0
- package/scripts/test-targetdir.js +49 -0
- package/scripts/test-venv-real.js +47 -0
- package/scripts/test-venv-simple.js +57 -0
- package/scripts/test-wait.js +49 -0
- package/scripts/test-with-log.js +63 -0
- package/extensions/services/evol/config.yaml +0 -149
- package/extensions/services/evol/routes/routes_management_ws.py +0 -127
- package/extensions/services/evol/static/index_evol.html +0 -14
- package/extensions/services/evol/static/js/app.js +0 -6304
- package/extensions/services/evol/static/js/auth.js +0 -326
- package/extensions/services/evol/static/js/evol-app-fixed.js +0 -50
- package/extensions/services/evol/static/js/evol-app.js.bak +0 -1800
- package/extensions/services/evol/static/js/kernel-client-example.js +0 -228
- package/extensions/services/evol/static/js/main.js +0 -141
- package/extensions/services/evol/static/js/stats.js +0 -217
- package/extensions/services/evol/static/js/token-manager.js +0 -175
- package/extensions/services/proxy/CHANGELOG_20260308.md +0 -258
- package/extensions/services/proxy/_fix_prints.py +0 -133
- package/extensions/services/proxy/_fix_prints2.py +0 -87
- package/extensions/services/proxy/console_auth.py +0 -109
- package/extensions/services/proxy/logs/websocket.log +0 -260
- package/extensions/services/proxy/main.py +0 -240
- package/extensions/services/proxy/requirements.txt +0 -13
- package/extensions/services/web/config.yaml +0 -149
- /package/extensions/services/{evol → kite_console}/static/pairing.html +0 -0
- /package/extensions/services/{evol → kite_console}/static/test_registry.html +0 -0
- /package/extensions/services/{evol → kite_console}/static/test_relay.html +0 -0
|
@@ -18,7 +18,9 @@ SYSTEM_BROADCAST_EVENTS = {
|
|
|
18
18
|
"module.ready", "module.registered", "module.started", "module.stopped",
|
|
19
19
|
"module.crashed", "module.exiting", "module.offline",
|
|
20
20
|
"module.shutdown.ack", "module.shutdown.ready",
|
|
21
|
+
"module.degraded", "module.recovered",
|
|
21
22
|
"system.ready", "registry.updated",
|
|
23
|
+
"system.instance.started", "system.instance.stopped",
|
|
22
24
|
}
|
|
23
25
|
|
|
24
26
|
# Module health states
|
|
@@ -35,10 +37,10 @@ CRITICAL = "critical"
|
|
|
35
37
|
class ModuleStatus:
|
|
36
38
|
"""Track health and resource state for a single module."""
|
|
37
39
|
|
|
38
|
-
def __init__(self, module_id: str,
|
|
40
|
+
def __init__(self, module_id: str, base_url: str, health_path: str, pid: int = None):
|
|
39
41
|
self.module_id = module_id
|
|
40
|
-
self.
|
|
41
|
-
self.
|
|
42
|
+
self.base_url = base_url
|
|
43
|
+
self.health_path = health_path
|
|
42
44
|
self.pid = pid
|
|
43
45
|
# Health check state
|
|
44
46
|
self.state: str = UNKNOWN
|
|
@@ -65,9 +67,13 @@ class HealthMonitor:
|
|
|
65
67
|
# Thresholds
|
|
66
68
|
HEALTH_TIMEOUT = 5 # HTTP timeout per health check
|
|
67
69
|
FAIL_THRESHOLD = 3 # consecutive failures before restart
|
|
68
|
-
|
|
70
|
+
MAX_STARTUP_RETRIES = 5 # 启动失败(未到 ready)最多重试 5 次
|
|
69
71
|
ALERT_AFTER_RESTARTS = 2 # publish alert after this many restarts
|
|
70
72
|
|
|
73
|
+
# Crash restart policy
|
|
74
|
+
MAX_RESTART_DELAY = 600 # 最大重启延迟:10分钟
|
|
75
|
+
STABLE_RUN_TIME = 600 # 稳定运行时间:10分钟(超过此时间才重置崩溃计数)
|
|
76
|
+
|
|
71
77
|
# Resource thresholds
|
|
72
78
|
MEMORY_WARNING = 70 # memory_percent > 70% → warning
|
|
73
79
|
MEMORY_CRITICAL = 85 # memory_percent > 85% → critical
|
|
@@ -84,9 +90,12 @@ class HealthMonitor:
|
|
|
84
90
|
self.kernel_port = kernel_port
|
|
85
91
|
self.publish_event = publish_event # async callable(event_dict)
|
|
86
92
|
self.rpc_call = None # set by entry.py: async callable(method, params)
|
|
93
|
+
self.request_exit = None # set by entry.py: callable(exit_code) — 请求进程退出
|
|
87
94
|
self.modules: dict[str, ModuleStatus] = {}
|
|
88
95
|
self._running = False
|
|
89
96
|
self._psutil = None # lazy import
|
|
97
|
+
self._dirty = False # 模块上下线标记,定时器到期时检查
|
|
98
|
+
self._first_check_done = False # 首次检查是否完成
|
|
90
99
|
|
|
91
100
|
# Restart decision state (module.exiting / module.stopped / module.ready)
|
|
92
101
|
self._exit_intents: dict[str, str] = {} # module_id -> action from module.exiting
|
|
@@ -96,7 +105,11 @@ class HealthMonitor:
|
|
|
96
105
|
self._system_shutting_down = False
|
|
97
106
|
self._system_ready = False
|
|
98
107
|
self._system_ready_event = asyncio.Event()
|
|
108
|
+
self._monitor_enabled = False # 监控开关,由 Launcher 通过 RPC 控制
|
|
99
109
|
self._crash_counts: dict[str, int] = {} # module_id -> consecutive crash count
|
|
110
|
+
self._restart_tasks: dict[str, asyncio.Task] = {} # module_id -> pending restart task
|
|
111
|
+
self._module_ready_times: dict[str, float] = {} # module_id -> last ready timestamp
|
|
112
|
+
self._module_ever_ready: dict[str, bool] = {} # module_id -> has ever been ready
|
|
100
113
|
|
|
101
114
|
# Launcher loss tracking
|
|
102
115
|
self._launcher_offline = False
|
|
@@ -104,6 +117,23 @@ class HealthMonitor:
|
|
|
104
117
|
self._launcher_restart_requested = False # True if launcher requested restart
|
|
105
118
|
self._launcher_startup_info = None # Startup info from launcher (python, argv, cwd, env)
|
|
106
119
|
|
|
120
|
+
# 降级状态跟踪:module_id -> {level, reason, degraded_at, ...}
|
|
121
|
+
self._degraded_modules: dict[str, dict] = {}
|
|
122
|
+
# 超长降级检测间隔(秒)
|
|
123
|
+
self._degraded_timeout = 300 # 5 分钟
|
|
124
|
+
|
|
125
|
+
# 健康事件持久化
|
|
126
|
+
self._health_events_dir = ""
|
|
127
|
+
self._health_events_path = ""
|
|
128
|
+
self._health_events_max = 10000 # 单文件最大条数
|
|
129
|
+
instance_dir = os.environ.get("KITE_INSTANCE_DIR", "")
|
|
130
|
+
if instance_dir:
|
|
131
|
+
self._health_events_dir = os.path.join(instance_dir, "health")
|
|
132
|
+
os.makedirs(self._health_events_dir, exist_ok=True)
|
|
133
|
+
self._health_events_path = os.path.join(self._health_events_dir, "events.jsonl")
|
|
134
|
+
# RPC 调用 Kernel 的回调(由 entry.py 设置)
|
|
135
|
+
self.rpc_call_kernel = None # async callable(method, params) → response
|
|
136
|
+
|
|
107
137
|
# ── Module discovery ──
|
|
108
138
|
|
|
109
139
|
async def discover_modules(self):
|
|
@@ -123,41 +153,53 @@ class HealthMonitor:
|
|
|
123
153
|
print(f"[watchdog] Launcher RPC failed: {e}")
|
|
124
154
|
return
|
|
125
155
|
|
|
126
|
-
# Step 2: Get health
|
|
127
|
-
health_map = {} # name -> {
|
|
156
|
+
# Step 2: Get health info from Registry via RPC
|
|
157
|
+
health_map = {} # name -> {base_url, health_path}
|
|
128
158
|
try:
|
|
129
|
-
|
|
159
|
+
# 查 base_url:有 base_url 的模块才有 HTTP 服务
|
|
160
|
+
resp = await self.rpc_call("registry.lookup", {"field": "base_url"})
|
|
130
161
|
result = resp.get("result", {})
|
|
131
162
|
for entry in result.get("results", []):
|
|
132
163
|
mid = entry.get("module", "")
|
|
133
164
|
if mid in monitored:
|
|
134
165
|
health_map[mid] = {
|
|
135
|
-
"
|
|
136
|
-
"
|
|
166
|
+
"base_url": entry.get("value", ""),
|
|
167
|
+
"health_path": "/health",
|
|
137
168
|
}
|
|
138
169
|
except Exception:
|
|
139
170
|
pass
|
|
140
171
|
|
|
172
|
+
try:
|
|
173
|
+
# 查 health_path:覆盖默认值
|
|
174
|
+
resp = await self.rpc_call("registry.lookup", {"field": "health_path"})
|
|
175
|
+
result = resp.get("result", {})
|
|
176
|
+
for entry in result.get("results", []):
|
|
177
|
+
mid = entry.get("module", "")
|
|
178
|
+
if mid in health_map:
|
|
179
|
+
health_map[mid]["health_path"] = entry.get("value", "/health")
|
|
180
|
+
except Exception:
|
|
181
|
+
pass
|
|
182
|
+
|
|
141
183
|
# Step 3: Sync module list
|
|
142
184
|
seen = set()
|
|
143
185
|
for mid, pid in monitored.items():
|
|
144
186
|
seen.add(mid)
|
|
145
187
|
h = health_map.get(mid, {})
|
|
146
|
-
ep = h.get("
|
|
147
|
-
hp = h.get("
|
|
188
|
+
ep = h.get("base_url", "")
|
|
189
|
+
hp = h.get("health_path", "/health")
|
|
148
190
|
if mid not in self.modules:
|
|
149
191
|
self.modules[mid] = ModuleStatus(
|
|
150
192
|
module_id=mid,
|
|
151
|
-
|
|
152
|
-
|
|
193
|
+
base_url=ep,
|
|
194
|
+
health_path=hp,
|
|
153
195
|
pid=pid,
|
|
154
196
|
)
|
|
155
197
|
else:
|
|
156
198
|
self.modules[mid].pid = pid
|
|
157
199
|
# Refresh endpoints once Registry has them
|
|
158
200
|
if ep:
|
|
159
|
-
self.modules[mid].
|
|
160
|
-
self.modules[mid].
|
|
201
|
+
self.modules[mid].base_url = ep
|
|
202
|
+
self.modules[mid].health_path = hp
|
|
161
203
|
|
|
162
204
|
for mid in list(self.modules):
|
|
163
205
|
if mid not in seen:
|
|
@@ -167,7 +209,7 @@ class HealthMonitor:
|
|
|
167
209
|
|
|
168
210
|
async def _check_one(self, status: ModuleStatus):
|
|
169
211
|
"""Check a single module's health via RPC."""
|
|
170
|
-
if not status.
|
|
212
|
+
if not status.base_url:
|
|
171
213
|
return # Not yet registered in Registry, will be picked up on next discover
|
|
172
214
|
|
|
173
215
|
status.last_check = time.time()
|
|
@@ -219,9 +261,10 @@ class HealthMonitor:
|
|
|
219
261
|
"error": status.last_error,
|
|
220
262
|
})
|
|
221
263
|
|
|
222
|
-
# Restart if threshold reached
|
|
264
|
+
# Restart if threshold reached (but NOT if module is degraded — degraded ≠ crashed)
|
|
223
265
|
if (status.fail_count >= self.FAIL_THRESHOLD
|
|
224
|
-
and status.restarted_count < self.MAX_RESTARTS
|
|
266
|
+
and status.restarted_count < self.MAX_RESTARTS
|
|
267
|
+
and not self.is_degraded(status.module_id)):
|
|
225
268
|
await self._restart_module(status)
|
|
226
269
|
|
|
227
270
|
# ── Restart via Launcher API ──
|
|
@@ -240,6 +283,10 @@ class HealthMonitor:
|
|
|
240
283
|
status.restarted_count += 1
|
|
241
284
|
status.fail_count = 0
|
|
242
285
|
print(f"[watchdog] {mid} restart requested")
|
|
286
|
+
await self._persist_health_event("watchdog.module.restarted", {
|
|
287
|
+
"module": mid, "restarted_count": status.restarted_count,
|
|
288
|
+
"reason": "resource_critical" if status.resource_state == CRITICAL else "restart",
|
|
289
|
+
})
|
|
243
290
|
if status.restarted_count >= self.ALERT_AFTER_RESTARTS:
|
|
244
291
|
await self._publish("watchdog.alert", {
|
|
245
292
|
"module_id": mid,
|
|
@@ -253,10 +300,8 @@ class HealthMonitor:
|
|
|
253
300
|
|
|
254
301
|
# ── Resource monitoring ──
|
|
255
302
|
|
|
256
|
-
def
|
|
257
|
-
"""
|
|
258
|
-
if not status.pid:
|
|
259
|
-
return None
|
|
303
|
+
def _collect_metrics_sync(self, pid: int) -> dict | None:
|
|
304
|
+
"""Synchronous metrics collection — runs in thread pool to avoid blocking event loop."""
|
|
260
305
|
if not self._psutil:
|
|
261
306
|
try:
|
|
262
307
|
import psutil
|
|
@@ -264,11 +309,11 @@ class HealthMonitor:
|
|
|
264
309
|
except ImportError:
|
|
265
310
|
return None
|
|
266
311
|
try:
|
|
267
|
-
proc = self._psutil.Process(
|
|
312
|
+
proc = self._psutil.Process(pid)
|
|
268
313
|
return {
|
|
269
314
|
"memory_rss": round(proc.memory_info().rss / 1024 / 1024, 1),
|
|
270
315
|
"memory_percent": round(proc.memory_percent(), 1),
|
|
271
|
-
"cpu_percent": round(proc.cpu_percent(interval=0
|
|
316
|
+
"cpu_percent": round(proc.cpu_percent(interval=0), 1),
|
|
272
317
|
"thread_count": proc.num_threads(),
|
|
273
318
|
}
|
|
274
319
|
except Exception:
|
|
@@ -276,7 +321,10 @@ class HealthMonitor:
|
|
|
276
321
|
|
|
277
322
|
async def _check_resources(self, status: ModuleStatus):
|
|
278
323
|
"""Collect metrics and evaluate resource state."""
|
|
279
|
-
|
|
324
|
+
if not status.pid:
|
|
325
|
+
return
|
|
326
|
+
loop = asyncio.get_event_loop()
|
|
327
|
+
metrics = await loop.run_in_executor(None, self._collect_metrics_sync, status.pid)
|
|
280
328
|
if not metrics:
|
|
281
329
|
return
|
|
282
330
|
status.last_metrics = metrics
|
|
@@ -333,6 +381,11 @@ class HealthMonitor:
|
|
|
333
381
|
await self._publish("watchdog.module.resource_warning", {
|
|
334
382
|
"module_id": status.module_id, "metrics": metrics,
|
|
335
383
|
})
|
|
384
|
+
# 资源告警 → 代发降级(level: slow)
|
|
385
|
+
await self._report_degraded_to_kernel(
|
|
386
|
+
status.module_id, "slow", "resource_pressure",
|
|
387
|
+
detail=f"CPU {metrics.get('cpu_percent', 0)}%, MEM {metrics.get('memory_percent', 0)}%"
|
|
388
|
+
)
|
|
336
389
|
else: # NORMAL
|
|
337
390
|
status.resource_warning_count = 0
|
|
338
391
|
status.resource_critical_count = 0
|
|
@@ -345,6 +398,11 @@ class HealthMonitor:
|
|
|
345
398
|
await self._publish("watchdog.module.resource_recovered", {
|
|
346
399
|
"module_id": status.module_id,
|
|
347
400
|
})
|
|
401
|
+
# 资源恢复 → 代发恢复(仅当是 Watchdog 代发的降级时)
|
|
402
|
+
if self.is_degraded(status.module_id):
|
|
403
|
+
deg_info = self._degraded_modules.get(status.module_id, {})
|
|
404
|
+
if deg_info.get("reason") == "resource_pressure":
|
|
405
|
+
await self._report_recovered_to_kernel(status.module_id)
|
|
348
406
|
|
|
349
407
|
async def _resource_restart(self, status: ModuleStatus):
|
|
350
408
|
"""Restart module due to resource critical timeout."""
|
|
@@ -385,7 +443,8 @@ class HealthMonitor:
|
|
|
385
443
|
if event_type == "system.ready":
|
|
386
444
|
print("[watchdog] Received system.ready")
|
|
387
445
|
self._system_ready = True
|
|
388
|
-
self.
|
|
446
|
+
await self._persist_health_event("system.ready", {})
|
|
447
|
+
# 不再自动启动监控,等待 Launcher 通过 RPC 调用 start_monitor
|
|
389
448
|
return
|
|
390
449
|
|
|
391
450
|
# module.offline — track launcher state
|
|
@@ -413,22 +472,25 @@ class HealthMonitor:
|
|
|
413
472
|
if not module_id or module_id == "watchdog":
|
|
414
473
|
# Handle registry.updated (no module_id)
|
|
415
474
|
if event_type == "registry.updated":
|
|
416
|
-
|
|
417
|
-
asyncio.create_task(self.discover_modules())
|
|
475
|
+
self._dirty = True # 标记,等定时器到期时重新获取
|
|
418
476
|
return
|
|
419
477
|
|
|
420
478
|
if event_type == "module.started":
|
|
421
479
|
print(f"[watchdog] Received module.started: {module_id}")
|
|
422
480
|
self._crash_counts.pop(module_id, None)
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
asyncio.create_task(self.discover_modules())
|
|
481
|
+
await self._persist_health_event("module.started", {"module": module_id})
|
|
482
|
+
self._dirty = True # 标记,等定时器到期时获取新模块信息
|
|
426
483
|
|
|
427
484
|
elif event_type == "module.stopped":
|
|
428
485
|
print(f"[watchdog] Received module.stopped: {module_id}")
|
|
429
|
-
self.modules.pop(module_id, None)
|
|
430
|
-
#
|
|
431
|
-
|
|
486
|
+
self.modules.pop(module_id, None) # 立即从监控列表移除
|
|
487
|
+
self._degraded_modules.pop(module_id, None) # 清除降级状态
|
|
488
|
+
await self._persist_health_event("module.stopped", {
|
|
489
|
+
"module": module_id,
|
|
490
|
+
"exit_code": data.get("exit_code"),
|
|
491
|
+
"stop_type": data.get("stop_type"),
|
|
492
|
+
})
|
|
493
|
+
self._dirty = True # 标记,等定时器到期时同步清单
|
|
432
494
|
await self._handle_module_stopped(module_id, data)
|
|
433
495
|
|
|
434
496
|
elif event_type == "module.exiting":
|
|
@@ -460,6 +522,29 @@ class HealthMonitor:
|
|
|
460
522
|
self._launcher_offline = False
|
|
461
523
|
self._launcher_had_exiting = False
|
|
462
524
|
|
|
525
|
+
# 记录模块 ready 时间和状态
|
|
526
|
+
self._module_ready_times[module_id] = time.time()
|
|
527
|
+
self._module_ever_ready[module_id] = True
|
|
528
|
+
|
|
529
|
+
# 取消 pending 的重启任务(如果有)
|
|
530
|
+
if module_id in self._restart_tasks:
|
|
531
|
+
task = self._restart_tasks.pop(module_id)
|
|
532
|
+
if not task.done():
|
|
533
|
+
task.cancel()
|
|
534
|
+
|
|
535
|
+
# 检查是否稳定运行超过 STABLE_RUN_TIME,如果是则重置崩溃计数
|
|
536
|
+
# 注意:这里不立即重置,而是在下次崩溃时检查运行时长
|
|
537
|
+
|
|
538
|
+
elif event_type == "module.degraded":
|
|
539
|
+
await self._handle_module_degraded(module_id, data)
|
|
540
|
+
|
|
541
|
+
elif event_type == "module.recovered":
|
|
542
|
+
await self._handle_module_recovered(module_id, data)
|
|
543
|
+
|
|
544
|
+
elif event_type in ("system.instance.started", "system.instance.stopped"):
|
|
545
|
+
# 实例扩缩容事件:标记需要重新发现,下次检查周期自动更新监控列表
|
|
546
|
+
self.mark_dirty()
|
|
547
|
+
|
|
463
548
|
# Layer 2: 忽略系统广播事件
|
|
464
549
|
elif event_type in SYSTEM_BROADCAST_EVENTS:
|
|
465
550
|
pass
|
|
@@ -472,15 +557,23 @@ class HealthMonitor:
|
|
|
472
557
|
"""Restart decision engine — called when module.stopped is received.
|
|
473
558
|
|
|
474
559
|
Priority:
|
|
560
|
+
0. Monitor disabled → no restart (Launcher controls module lifecycle)
|
|
475
561
|
1. System shutting down → no restart
|
|
476
562
|
2. stop_type == "graceful_stop" → Launcher主动停止,不重启
|
|
477
|
-
3.
|
|
478
|
-
4. stop_type == "process_exit" +
|
|
563
|
+
3. Launcher 在 stopped 事件中标记 restart=False + exit_type=graceful → 不重启(兜底)
|
|
564
|
+
4. stop_type == "process_exit" + has exit_intent → 按 intent 处理
|
|
565
|
+
5. stop_type == "process_exit" + no intent → 崩溃,重启
|
|
479
566
|
"""
|
|
480
567
|
# Sync graceful_shutdown from Launcher (covers missed module.ready)
|
|
481
568
|
if "graceful_shutdown" in data:
|
|
482
569
|
self._graceful_modules[module_id] = bool(data["graceful_shutdown"])
|
|
483
570
|
|
|
571
|
+
# 监控未开启时,不做任何重启决策(启动阶段由 Launcher 管理)
|
|
572
|
+
if not self._monitor_enabled:
|
|
573
|
+
print(f"[watchdog] {module_id} stopped, but monitoring is disabled — skipping restart")
|
|
574
|
+
self._exit_intents.pop(module_id, None)
|
|
575
|
+
return
|
|
576
|
+
|
|
484
577
|
if self._system_shutting_down:
|
|
485
578
|
print(f"[watchdog] {module_id} stopped during shutdown, skipping restart")
|
|
486
579
|
return
|
|
@@ -496,6 +589,13 @@ class HealthMonitor:
|
|
|
496
589
|
self._exit_intents.pop(module_id, None) # 清理可能残留的intent
|
|
497
590
|
return
|
|
498
591
|
|
|
592
|
+
# 兜底:检查 Launcher 在 module.stopped 事件中的决策(防止 module.exiting 因竞态丢失)
|
|
593
|
+
if data.get("restart") is False and data.get("exit_type") == "graceful":
|
|
594
|
+
reason = data.get("reason", "unknown")
|
|
595
|
+
print(f"[watchdog] {module_id} exited gracefully (reason={reason}), no restart (Launcher decision)")
|
|
596
|
+
self._exit_intents.pop(module_id, None)
|
|
597
|
+
return
|
|
598
|
+
|
|
499
599
|
# stop_type == "process_exit": 进程自行退出,需要判断是否崩溃
|
|
500
600
|
intent = self._exit_intents.pop(module_id, None)
|
|
501
601
|
if intent is not None:
|
|
@@ -517,19 +617,77 @@ class HealthMonitor:
|
|
|
517
617
|
return
|
|
518
618
|
|
|
519
619
|
# No exit intent → treat as crash
|
|
520
|
-
|
|
521
|
-
|
|
522
|
-
|
|
620
|
+
# 如果已有 pending 的重启任务,取消它(避免重复重启)
|
|
621
|
+
if module_id in self._restart_tasks:
|
|
622
|
+
existing_task = self._restart_tasks[module_id]
|
|
623
|
+
if not existing_task.done():
|
|
624
|
+
print(f"[watchdog] {module_id} already has a pending restart, skipping duplicate")
|
|
625
|
+
return
|
|
626
|
+
|
|
627
|
+
# 检查模块是否曾经 ready 过
|
|
628
|
+
ever_ready = self._module_ever_ready.get(module_id, False)
|
|
629
|
+
|
|
630
|
+
if ever_ready:
|
|
631
|
+
# 运行时崩溃:检查是否稳定运行超过 STABLE_RUN_TIME
|
|
632
|
+
last_ready_time = self._module_ready_times.get(module_id, 0)
|
|
633
|
+
run_time = time.time() - last_ready_time
|
|
523
634
|
|
|
524
|
-
|
|
525
|
-
|
|
526
|
-
|
|
635
|
+
if run_time >= self.STABLE_RUN_TIME:
|
|
636
|
+
# 稳定运行超过 10 分钟,重置崩溃计数
|
|
637
|
+
print(f"[watchdog] {module_id} ran stably for {run_time:.0f}s, resetting crash count")
|
|
638
|
+
self._crash_counts[module_id] = 0
|
|
639
|
+
|
|
640
|
+
# 运行时崩溃:无限重试,使用指数退避
|
|
641
|
+
self._crash_counts[module_id] = self._crash_counts.get(module_id, 0) + 1
|
|
642
|
+
count = self._crash_counts[module_id]
|
|
643
|
+
exit_code = data.get("exit_code", -1)
|
|
644
|
+
|
|
645
|
+
# 指数退避:1秒、2秒、4秒、8秒...最大10分钟
|
|
646
|
+
delay = min(2 ** (count - 1), self.MAX_RESTART_DELAY)
|
|
647
|
+
print(f"\033[94m[watchdog] {module_id} crashed (exit_code={exit_code}), will restart in {delay}s (crash #{count}, runtime crash)\033[0m")
|
|
648
|
+
|
|
649
|
+
# 创建延迟重启任务
|
|
650
|
+
restart_task = asyncio.create_task(self._delayed_restart(module_id, delay, "runtime_crash", count))
|
|
651
|
+
self._restart_tasks[module_id] = restart_task
|
|
527
652
|
else:
|
|
528
|
-
|
|
529
|
-
|
|
530
|
-
|
|
531
|
-
|
|
532
|
-
|
|
653
|
+
# 启动失败(未到 ready):最多重试 5 次
|
|
654
|
+
self._crash_counts[module_id] = self._crash_counts.get(module_id, 0) + 1
|
|
655
|
+
count = self._crash_counts[module_id]
|
|
656
|
+
exit_code = data.get("exit_code", -1)
|
|
657
|
+
|
|
658
|
+
if count <= self.MAX_STARTUP_RETRIES:
|
|
659
|
+
# 指数退避:1秒、2秒、4秒、8秒、16秒
|
|
660
|
+
delay = 2 ** (count - 1)
|
|
661
|
+
print(f"\033[94m[watchdog] {module_id} failed to start (exit_code={exit_code}), will retry in {delay}s ({count}/{self.MAX_STARTUP_RETRIES})\033[0m")
|
|
662
|
+
|
|
663
|
+
# 创建延迟重启任务
|
|
664
|
+
restart_task = asyncio.create_task(self._delayed_restart(module_id, delay, "startup_failure", count))
|
|
665
|
+
self._restart_tasks[module_id] = restart_task
|
|
666
|
+
else:
|
|
667
|
+
print(f"\033[94m[watchdog] {module_id} failed to start {count} times, giving up\033[0m")
|
|
668
|
+
await self._publish("watchdog.alert", {
|
|
669
|
+
"module_id": module_id,
|
|
670
|
+
"message": f"{module_id} failed to start after {self.MAX_STARTUP_RETRIES} retries",
|
|
671
|
+
})
|
|
672
|
+
|
|
673
|
+
async def _delayed_restart(self, module_id: str, delay: float, crash_type: str, attempt: int):
|
|
674
|
+
"""延迟重启模块(指数退避)
|
|
675
|
+
|
|
676
|
+
Args:
|
|
677
|
+
module_id: 模块名
|
|
678
|
+
delay: 延迟时间(秒)
|
|
679
|
+
crash_type: 崩溃类型 (startup_failure, runtime_crash)
|
|
680
|
+
attempt: 重启尝试次数
|
|
681
|
+
"""
|
|
682
|
+
try:
|
|
683
|
+
await asyncio.sleep(delay)
|
|
684
|
+
# 构造详细的 reason,包含崩溃类型和尝试次数
|
|
685
|
+
reason = f"watchdog_{crash_type}_attempt_{attempt}"
|
|
686
|
+
await self._restart_module_by_id(module_id, reason=reason)
|
|
687
|
+
except asyncio.CancelledError:
|
|
688
|
+
print(f"[watchdog] {module_id} restart cancelled")
|
|
689
|
+
finally:
|
|
690
|
+
self._restart_tasks.pop(module_id, None)
|
|
533
691
|
|
|
534
692
|
|
|
535
693
|
async def _quick_check_launcher_exit(self):
|
|
@@ -544,13 +702,16 @@ class HealthMonitor:
|
|
|
544
702
|
print(f"[watchdog] Launcher 已退出(检测 {(i+1)*0.2:.1f}s),启动新实例")
|
|
545
703
|
self._start_new_instance()
|
|
546
704
|
print("[watchdog] 新实例已启动,watchdog 退出")
|
|
547
|
-
|
|
705
|
+
if self.request_exit:
|
|
706
|
+
self.request_exit(0)
|
|
707
|
+
return
|
|
548
708
|
|
|
549
709
|
# 5s 后仍未退出,强制重启
|
|
550
710
|
print("[watchdog] Launcher 5s 内未退出,强制重启")
|
|
551
711
|
self._start_new_instance()
|
|
552
712
|
print("[watchdog] 新实例已启动,watchdog 退出")
|
|
553
|
-
|
|
713
|
+
if self.request_exit:
|
|
714
|
+
self.request_exit(0)
|
|
554
715
|
|
|
555
716
|
async def _handle_launcher_lost(self):
|
|
556
717
|
"""Handle launcher_lost: decide whether to start a new Kite instance.
|
|
@@ -560,12 +721,12 @@ class HealthMonitor:
|
|
|
560
721
|
"""
|
|
561
722
|
if self._launcher_had_exiting:
|
|
562
723
|
print("[watchdog] Launcher had sent module.exiting before loss → normal exit, following suit")
|
|
563
|
-
sys.exit(0)
|
|
564
724
|
else:
|
|
565
725
|
print("[watchdog] Launcher lost without module.exiting → crash detected, starting new instance")
|
|
566
726
|
self._start_new_instance()
|
|
567
727
|
print("[watchdog] New instance started, exiting")
|
|
568
|
-
|
|
728
|
+
if self.request_exit:
|
|
729
|
+
self.request_exit(0)
|
|
569
730
|
|
|
570
731
|
def _start_new_instance(self):
|
|
571
732
|
"""Start a new Kite instance using saved startup info from launcher."""
|
|
@@ -671,6 +832,150 @@ class HealthMonitor:
|
|
|
671
832
|
except Exception as e:
|
|
672
833
|
print(f"[watchdog] Failed to start new instance: {e}")
|
|
673
834
|
|
|
835
|
+
# ── 降级状态管理 ──
|
|
836
|
+
|
|
837
|
+
async def _handle_module_degraded(self, module_id: str, data: dict):
|
|
838
|
+
"""处理 module.degraded 事件:记录降级状态,降级模块不触发重启。"""
|
|
839
|
+
level = data.get("level", "unknown")
|
|
840
|
+
reason = data.get("reason", "unknown")
|
|
841
|
+
print(f"[watchdog] Module degraded: {module_id} level={level} reason={reason}")
|
|
842
|
+
self._degraded_modules[module_id] = {
|
|
843
|
+
"level": level,
|
|
844
|
+
"reason": reason,
|
|
845
|
+
"affected": data.get("affected", []),
|
|
846
|
+
"degraded_at": time.time(),
|
|
847
|
+
}
|
|
848
|
+
# 持久化健康事件
|
|
849
|
+
await self._persist_health_event("module.degraded", {
|
|
850
|
+
"module": module_id, "level": level, "reason": reason,
|
|
851
|
+
"detail": data.get("detail"),
|
|
852
|
+
})
|
|
853
|
+
|
|
854
|
+
async def _handle_module_recovered(self, module_id: str, data: dict):
|
|
855
|
+
"""处理 module.recovered 事件:清除降级状态。"""
|
|
856
|
+
prev = self._degraded_modules.pop(module_id, None)
|
|
857
|
+
duration_ms = data.get("duration_ms")
|
|
858
|
+
print(f"[watchdog] Module recovered: {module_id} (duration={duration_ms}ms)")
|
|
859
|
+
await self._persist_health_event("module.recovered", {
|
|
860
|
+
"module": module_id,
|
|
861
|
+
"previous_level": data.get("previous_level"),
|
|
862
|
+
"duration_ms": duration_ms,
|
|
863
|
+
})
|
|
864
|
+
|
|
865
|
+
def is_degraded(self, module_id: str) -> bool:
|
|
866
|
+
"""检查模块是否处于降级状态。"""
|
|
867
|
+
return module_id in self._degraded_modules
|
|
868
|
+
|
|
869
|
+
async def _report_degraded_to_kernel(self, module_id: str, level: str, reason: str, detail: str = None):
|
|
870
|
+
"""通过 Kernel RPC 代发降级事件(资源类降级)。"""
|
|
871
|
+
if not self.rpc_call_kernel:
|
|
872
|
+
return
|
|
873
|
+
try:
|
|
874
|
+
params = {
|
|
875
|
+
"module_id": module_id,
|
|
876
|
+
"level": level,
|
|
877
|
+
"reason": reason,
|
|
878
|
+
"affected": [],
|
|
879
|
+
}
|
|
880
|
+
if detail:
|
|
881
|
+
params["detail"] = detail
|
|
882
|
+
await self.rpc_call_kernel("kernel.report_degraded", params)
|
|
883
|
+
except Exception as e:
|
|
884
|
+
print(f"[watchdog] Failed to report degraded for {module_id}: {e}")
|
|
885
|
+
|
|
886
|
+
async def _report_recovered_to_kernel(self, module_id: str):
|
|
887
|
+
"""通过 Kernel RPC 代发恢复事件(资源类恢复)。"""
|
|
888
|
+
if not self.rpc_call_kernel:
|
|
889
|
+
return
|
|
890
|
+
try:
|
|
891
|
+
await self.rpc_call_kernel("kernel.report_recovered", {"module_id": module_id})
|
|
892
|
+
except Exception as e:
|
|
893
|
+
print(f"[watchdog] Failed to report recovered for {module_id}: {e}")
|
|
894
|
+
|
|
895
|
+
async def _check_degraded_timeout(self):
|
|
896
|
+
"""检查超长降级(超过 5 分钟),发布告警事件。"""
|
|
897
|
+
now = time.time()
|
|
898
|
+
for mid, info in list(self._degraded_modules.items()):
|
|
899
|
+
degraded_at = info.get("degraded_at", 0)
|
|
900
|
+
if degraded_at and (now - degraded_at) >= self._degraded_timeout:
|
|
901
|
+
duration = int((now - degraded_at) * 1000)
|
|
902
|
+
# 只告警一次:标记已告警
|
|
903
|
+
if not info.get("_timeout_alerted"):
|
|
904
|
+
info["_timeout_alerted"] = True
|
|
905
|
+
print(f"[watchdog] Module {mid} degraded for {duration}ms (>{self._degraded_timeout}s), alerting")
|
|
906
|
+
await self._publish("watchdog.module.degraded_timeout", {
|
|
907
|
+
"module_id": mid,
|
|
908
|
+
"level": info.get("level"),
|
|
909
|
+
"reason": info.get("reason"),
|
|
910
|
+
"duration_ms": duration,
|
|
911
|
+
})
|
|
912
|
+
await self._persist_health_event("watchdog.module.degraded_timeout", {
|
|
913
|
+
"module": mid, "duration_ms": duration,
|
|
914
|
+
})
|
|
915
|
+
|
|
916
|
+
# ── 健康事件持久化 ──
|
|
917
|
+
|
|
918
|
+
async def _persist_health_event(self, event: str, data: dict):
|
|
919
|
+
"""将健康事件追加到 events.jsonl,按天归档。"""
|
|
920
|
+
if not self._health_events_dir:
|
|
921
|
+
return
|
|
922
|
+
record = {
|
|
923
|
+
"ts": datetime.now(timezone.utc).isoformat(),
|
|
924
|
+
"event": event,
|
|
925
|
+
**data,
|
|
926
|
+
}
|
|
927
|
+
line = json.dumps(record, ensure_ascii=False) + "\n"
|
|
928
|
+
try:
|
|
929
|
+
# 写入当前文件
|
|
930
|
+
with open(self._health_events_path, "a", encoding="utf-8") as f:
|
|
931
|
+
f.write(line)
|
|
932
|
+
# 写入按天归档
|
|
933
|
+
today = datetime.now().strftime("%Y-%m-%d")
|
|
934
|
+
month_dir = os.path.join(self._health_events_dir, today[:7])
|
|
935
|
+
os.makedirs(month_dir, exist_ok=True)
|
|
936
|
+
daily_path = os.path.join(month_dir, f"{today}.jsonl")
|
|
937
|
+
with open(daily_path, "a", encoding="utf-8") as f:
|
|
938
|
+
f.write(line)
|
|
939
|
+
except Exception as e:
|
|
940
|
+
print(f"[watchdog] Failed to persist health event: {e}")
|
|
941
|
+
|
|
942
|
+
def get_health_events(self, limit: int = 50, since: str = None) -> dict:
|
|
943
|
+
"""查询健康事件记录。
|
|
944
|
+
|
|
945
|
+
Args:
|
|
946
|
+
limit: 最多返回条数(默认 50,最大 500)
|
|
947
|
+
since: ISO 8601 时间戳,只返回此时间之后的事件
|
|
948
|
+
|
|
949
|
+
Returns:
|
|
950
|
+
{"events": [...], "total": int, "has_more": bool}
|
|
951
|
+
"""
|
|
952
|
+
limit = min(max(1, limit), 500)
|
|
953
|
+
if not self._health_events_path or not os.path.exists(self._health_events_path):
|
|
954
|
+
return {"events": [], "total": 0, "has_more": False}
|
|
955
|
+
|
|
956
|
+
events = []
|
|
957
|
+
try:
|
|
958
|
+
with open(self._health_events_path, "r", encoding="utf-8") as f:
|
|
959
|
+
for line in f:
|
|
960
|
+
line = line.strip()
|
|
961
|
+
if not line:
|
|
962
|
+
continue
|
|
963
|
+
try:
|
|
964
|
+
record = json.loads(line)
|
|
965
|
+
if since and record.get("ts", "") <= since:
|
|
966
|
+
continue
|
|
967
|
+
events.append(record)
|
|
968
|
+
except json.JSONDecodeError:
|
|
969
|
+
continue
|
|
970
|
+
except Exception:
|
|
971
|
+
pass
|
|
972
|
+
|
|
973
|
+
total = len(events)
|
|
974
|
+
# 返回最新的 limit 条(倒序)
|
|
975
|
+
events = events[-limit:]
|
|
976
|
+
events.reverse()
|
|
977
|
+
return {"events": events, "total": total, "has_more": total > limit}
|
|
978
|
+
|
|
674
979
|
async def _restart_module_by_id(self, module_id: str, reason: str = "restart"):
|
|
675
980
|
"""Restart a module via Launcher RPC by module_id."""
|
|
676
981
|
print(f"[watchdog] Requesting restart for {module_id} (reason={reason})")
|
|
@@ -687,16 +992,51 @@ class HealthMonitor:
|
|
|
687
992
|
except Exception as e:
|
|
688
993
|
print(f"[watchdog] {module_id} restart error: {e}")
|
|
689
994
|
|
|
995
|
+
# ── Monitor control (called by Launcher via RPC) ──
|
|
996
|
+
|
|
997
|
+
def start_monitor(self) -> dict:
|
|
998
|
+
"""由 Launcher 调用,开启监控(模块重启职责移交给 Watchdog)"""
|
|
999
|
+
if self._monitor_enabled:
|
|
1000
|
+
return {"status": "already_enabled"}
|
|
1001
|
+
self._monitor_enabled = True
|
|
1002
|
+
# 同时设置 system.ready,唤醒健康检查循环
|
|
1003
|
+
self._system_ready = True
|
|
1004
|
+
self._system_ready_event.set()
|
|
1005
|
+
print("[watchdog] 监控已开启(由 Launcher 控制)")
|
|
1006
|
+
return {"status": "enabled"}
|
|
1007
|
+
|
|
1008
|
+
def stop_monitor(self) -> dict:
|
|
1009
|
+
"""由 Launcher 调用,关闭监控(模块重启职责收回 Launcher)"""
|
|
1010
|
+
if not self._monitor_enabled:
|
|
1011
|
+
return {"status": "already_disabled"}
|
|
1012
|
+
self._monitor_enabled = False
|
|
1013
|
+
# 取消所有 pending 的重启任务
|
|
1014
|
+
for module_id, task in list(self._restart_tasks.items()):
|
|
1015
|
+
if not task.done():
|
|
1016
|
+
task.cancel()
|
|
1017
|
+
print(f"[watchdog] 取消 {module_id} 的 pending 重启任务")
|
|
1018
|
+
self._restart_tasks.clear()
|
|
1019
|
+
print("[watchdog] 监控已关闭(由 Launcher 控制)")
|
|
1020
|
+
return {"status": "disabled"}
|
|
1021
|
+
|
|
690
1022
|
# ── Main loop ──
|
|
691
1023
|
|
|
692
1024
|
async def run(self):
|
|
693
|
-
"""Main monitoring loop with
|
|
694
|
-
|
|
1025
|
+
"""Main monitoring loop with grouped timers per resource state.
|
|
1026
|
+
|
|
1027
|
+
机制:
|
|
1028
|
+
- system.ready + start_monitor 后等 15 秒再开始首次检查
|
|
1029
|
+
- 首次检查时查注册中心获取健康接口清单
|
|
1030
|
+
- 3 个状态组(NORMAL/WARNING/CRITICAL)各自独立周期
|
|
1031
|
+
- 单循环维护 3 个 next_check 时间戳,每次 sleep 到最近到期的组
|
|
1032
|
+
- 模块状态变化时自动迁移到对应组
|
|
1033
|
+
- dirty 标记:事件驱动标记 + 定时器驱动获取,防止抖动
|
|
1034
|
+
"""
|
|
695
1035
|
self._running = True
|
|
696
|
-
print("[watchdog] Monitor started, waiting for
|
|
1036
|
+
print("[watchdog] Monitor started, waiting for Launcher to enable monitoring...")
|
|
697
1037
|
|
|
698
|
-
# Wait for
|
|
699
|
-
while self._running and not self.
|
|
1038
|
+
# Wait for Launcher to call start_monitor() via RPC
|
|
1039
|
+
while self._running and not self._monitor_enabled:
|
|
700
1040
|
try:
|
|
701
1041
|
await asyncio.wait_for(self._system_ready_event.wait(), timeout=1.0)
|
|
702
1042
|
except asyncio.TimeoutError:
|
|
@@ -704,45 +1044,82 @@ class HealthMonitor:
|
|
|
704
1044
|
|
|
705
1045
|
if not self._running:
|
|
706
1046
|
return
|
|
707
|
-
print("[watchdog]
|
|
1047
|
+
print("[watchdog] Monitoring enabled, first check in 15s...")
|
|
708
1048
|
|
|
709
|
-
#
|
|
710
|
-
|
|
1049
|
+
# 等 15 秒,给模块充分时间完成注册
|
|
1050
|
+
for _ in range(15):
|
|
1051
|
+
if not self._running:
|
|
1052
|
+
return
|
|
1053
|
+
await asyncio.sleep(1)
|
|
711
1054
|
|
|
712
|
-
#
|
|
713
|
-
|
|
714
|
-
|
|
715
|
-
|
|
1055
|
+
# 首次发现
|
|
1056
|
+
await self.discover_modules()
|
|
1057
|
+
self._first_check_done = True
|
|
1058
|
+
self._dirty = False
|
|
1059
|
+
print(f"[watchdog] Initial discovery done, monitoring {len(self.modules)} modules")
|
|
1060
|
+
|
|
1061
|
+
# 分组定时器:3 个 next_check 时间戳
|
|
1062
|
+
now = asyncio.get_event_loop().time()
|
|
1063
|
+
next_check = {
|
|
1064
|
+
NORMAL: now, # 立即开始首次检查
|
|
1065
|
+
WARNING: now,
|
|
1066
|
+
CRITICAL: now,
|
|
1067
|
+
}
|
|
1068
|
+
# 上次 discover 时间(定期兜底重发现,5 分钟)
|
|
1069
|
+
last_discovery = now
|
|
1070
|
+
discovery_interval = 300.0
|
|
716
1071
|
|
|
717
1072
|
while self._running:
|
|
718
|
-
if self.modules:
|
|
719
|
-
tasks = []
|
|
720
|
-
for s in self.modules.values():
|
|
721
|
-
tasks.append(self._check_one(s))
|
|
722
|
-
tasks.append(self._check_resources(s))
|
|
723
|
-
await asyncio.gather(*tasks, return_exceptions=True)
|
|
724
|
-
|
|
725
|
-
# Sleep = shortest interval needed by any module
|
|
726
|
-
interval = self._min_interval()
|
|
727
|
-
await asyncio.sleep(interval)
|
|
728
|
-
|
|
729
|
-
# Periodic re-discovery
|
|
730
1073
|
now = asyncio.get_event_loop().time()
|
|
731
|
-
|
|
732
|
-
|
|
1074
|
+
|
|
1075
|
+
# 找到最近到期的时间戳
|
|
1076
|
+
nearest = min(next_check.values())
|
|
1077
|
+
wait = max(0, nearest - now)
|
|
1078
|
+
if wait > 0:
|
|
1079
|
+
await asyncio.sleep(wait)
|
|
1080
|
+
if not self._running:
|
|
1081
|
+
return
|
|
1082
|
+
now = asyncio.get_event_loop().time()
|
|
1083
|
+
|
|
1084
|
+
# dirty 检查:任意定时器到期时,如果有 dirty 标记就重新获取清单
|
|
1085
|
+
if self._dirty:
|
|
733
1086
|
await self.discover_modules()
|
|
734
|
-
|
|
1087
|
+
self._dirty = False
|
|
735
1088
|
last_discovery = now
|
|
736
|
-
|
|
737
|
-
|
|
1089
|
+
|
|
1090
|
+
# 定期兜底重发现(5 分钟)
|
|
1091
|
+
if now - last_discovery >= discovery_interval:
|
|
738
1092
|
await self.discover_modules()
|
|
1093
|
+
self._dirty = False
|
|
739
1094
|
last_discovery = now
|
|
740
1095
|
|
|
741
|
-
|
|
742
|
-
|
|
743
|
-
|
|
744
|
-
|
|
745
|
-
|
|
1096
|
+
# 按模块当前 resource_state 分组
|
|
1097
|
+
groups: dict[str, list[ModuleStatus]] = {NORMAL: [], WARNING: [], CRITICAL: []}
|
|
1098
|
+
for s in self.modules.values():
|
|
1099
|
+
groups.get(s.resource_state, groups[NORMAL]).append(s)
|
|
1100
|
+
|
|
1101
|
+
# 检查到期的组
|
|
1102
|
+
for level in (CRITICAL, WARNING, NORMAL):
|
|
1103
|
+
if now >= next_check[level] and groups[level]:
|
|
1104
|
+
tasks = []
|
|
1105
|
+
for s in groups[level]:
|
|
1106
|
+
tasks.append(self._check_one(s))
|
|
1107
|
+
tasks.append(self._check_resources(s))
|
|
1108
|
+
await asyncio.gather(*tasks, return_exceptions=True)
|
|
1109
|
+
# 更新该组的下次检查时间
|
|
1110
|
+
next_check[level] = now + self.INTERVALS[level]
|
|
1111
|
+
|
|
1112
|
+
# 没有模块的组也要推进时间戳,防止空组一直触发
|
|
1113
|
+
for level in (CRITICAL, WARNING, NORMAL):
|
|
1114
|
+
if not groups[level] and now >= next_check[level]:
|
|
1115
|
+
next_check[level] = now + self.INTERVALS[level]
|
|
1116
|
+
|
|
1117
|
+
# 超长降级检测
|
|
1118
|
+
await self._check_degraded_timeout()
|
|
1119
|
+
|
|
1120
|
+
def mark_dirty(self):
|
|
1121
|
+
"""外部标记需要重新获取模块清单(如 module.offline 事件)。"""
|
|
1122
|
+
self._dirty = True
|
|
746
1123
|
|
|
747
1124
|
def stop(self):
|
|
748
1125
|
self._running = False
|
|
@@ -752,7 +1129,8 @@ class HealthMonitor:
|
|
|
752
1129
|
def get_status(self) -> dict:
|
|
753
1130
|
"""Return current health and resource status of all monitored modules."""
|
|
754
1131
|
return {
|
|
755
|
-
|
|
1132
|
+
"_monitor_enabled": self._monitor_enabled,
|
|
1133
|
+
**{mid: {
|
|
756
1134
|
"state": s.state,
|
|
757
1135
|
"fail_count": s.fail_count,
|
|
758
1136
|
"restarted_count": s.restarted_count,
|
|
@@ -764,4 +1142,4 @@ class HealthMonitor:
|
|
|
764
1142
|
"startup_time": s.startup_time,
|
|
765
1143
|
}
|
|
766
1144
|
for mid, s in self.modules.items()
|
|
767
|
-
}
|
|
1145
|
+
}}
|