@agentunion/kite 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/__init__.py +1 -0
- package/__main__.py +15 -0
- package/cli.js +70 -0
- package/core/__init__.py +0 -0
- package/core/__pycache__/__init__.cpython-313.pyc +0 -0
- package/core/event_hub/BENCHMARK.md +94 -0
- package/core/event_hub/__init__.py +0 -0
- package/core/event_hub/__pycache__/__init__.cpython-313.pyc +0 -0
- package/core/event_hub/__pycache__/bench.cpython-313.pyc +0 -0
- package/core/event_hub/__pycache__/bench_perf.cpython-313.pyc +0 -0
- package/core/event_hub/__pycache__/dedup.cpython-313.pyc +0 -0
- package/core/event_hub/__pycache__/entry.cpython-313.pyc +0 -0
- package/core/event_hub/__pycache__/hub.cpython-313.pyc +0 -0
- package/core/event_hub/__pycache__/router.cpython-313.pyc +0 -0
- package/core/event_hub/__pycache__/server.cpython-313.pyc +0 -0
- package/core/event_hub/bench.py +459 -0
- package/core/event_hub/bench_extreme.py +308 -0
- package/core/event_hub/bench_perf.py +350 -0
- package/core/event_hub/bench_results/.gitkeep +0 -0
- package/core/event_hub/bench_results/2026-02-28_13-26-48.json +51 -0
- package/core/event_hub/bench_results/2026-02-28_13-44-45.json +51 -0
- package/core/event_hub/bench_results/2026-02-28_13-45-39.json +51 -0
- package/core/event_hub/dedup.py +31 -0
- package/core/event_hub/entry.py +113 -0
- package/core/event_hub/hub.py +263 -0
- package/core/event_hub/module.md +21 -0
- package/core/event_hub/router.py +21 -0
- package/core/event_hub/server.py +138 -0
- package/core/event_hub_bench/entry.py +371 -0
- package/core/event_hub_bench/module.md +25 -0
- package/core/launcher/__init__.py +0 -0
- package/core/launcher/__pycache__/__init__.cpython-313.pyc +0 -0
- package/core/launcher/__pycache__/entry.cpython-313.pyc +0 -0
- package/core/launcher/__pycache__/module_scanner.cpython-313.pyc +0 -0
- package/core/launcher/__pycache__/process_manager.cpython-313.pyc +0 -0
- package/core/launcher/data/log/lifecycle.jsonl +1045 -0
- package/core/launcher/data/processes_14752.json +32 -0
- package/core/launcher/data/token.txt +1 -0
- package/core/launcher/entry.py +965 -0
- package/core/launcher/module.md +37 -0
- package/core/launcher/module_scanner.py +253 -0
- package/core/launcher/process_manager.py +435 -0
- package/core/registry/__init__.py +0 -0
- package/core/registry/__pycache__/__init__.cpython-313.pyc +0 -0
- package/core/registry/__pycache__/entry.cpython-313.pyc +0 -0
- package/core/registry/__pycache__/server.cpython-313.pyc +0 -0
- package/core/registry/__pycache__/store.cpython-313.pyc +0 -0
- package/core/registry/data/port.txt +1 -0
- package/core/registry/data/port_14752.txt +1 -0
- package/core/registry/data/port_484.txt +1 -0
- package/core/registry/entry.py +73 -0
- package/core/registry/module.md +30 -0
- package/core/registry/server.py +256 -0
- package/core/registry/store.py +232 -0
- package/extensions/__init__.py +0 -0
- package/extensions/__pycache__/__init__.cpython-313.pyc +0 -0
- package/extensions/services/__init__.py +0 -0
- package/extensions/services/__pycache__/__init__.cpython-313.pyc +0 -0
- package/extensions/services/watchdog/__init__.py +0 -0
- package/extensions/services/watchdog/__pycache__/__init__.cpython-313.pyc +0 -0
- package/extensions/services/watchdog/__pycache__/entry.cpython-313.pyc +0 -0
- package/extensions/services/watchdog/__pycache__/monitor.cpython-313.pyc +0 -0
- package/extensions/services/watchdog/__pycache__/server.cpython-313.pyc +0 -0
- package/extensions/services/watchdog/entry.py +143 -0
- package/extensions/services/watchdog/module.md +25 -0
- package/extensions/services/watchdog/monitor.py +420 -0
- package/extensions/services/watchdog/server.py +167 -0
- package/main.py +17 -0
- package/package.json +27 -0
|
@@ -0,0 +1,256 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Registry HTTP server.
|
|
3
|
+
7 endpoints: /modules, /lookup, /get/{path}, /tokens, /verify, /query, /health.
|
|
4
|
+
All endpoints except /health require Bearer token auth.
|
|
5
|
+
Connects to Event Hub to publish module lifecycle events.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import asyncio
|
|
9
|
+
import json
|
|
10
|
+
import uuid
|
|
11
|
+
from typing import Any
|
|
12
|
+
|
|
13
|
+
import websockets
|
|
14
|
+
from fastapi import FastAPI, Request, HTTPException
|
|
15
|
+
from fastapi.responses import JSONResponse
|
|
16
|
+
|
|
17
|
+
from .store import RegistryStore
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class RegistryServer:
|
|
21
|
+
"""FastAPI-based Registry HTTP server."""
|
|
22
|
+
|
|
23
|
+
def __init__(self, store: RegistryStore, launcher_token: str = ""):
|
|
24
|
+
self.store = store
|
|
25
|
+
self.launcher_token = launcher_token
|
|
26
|
+
self.app = self._create_app()
|
|
27
|
+
self._ttl_task: asyncio.Task | None = None
|
|
28
|
+
# Event Hub WebSocket
|
|
29
|
+
self._event_hub_ws_url: str = ""
|
|
30
|
+
self._ws: object | None = None
|
|
31
|
+
self._ws_task: asyncio.Task | None = None
|
|
32
|
+
|
|
33
|
+
def _extract_token(self, request: Request) -> str:
|
|
34
|
+
"""Extract Bearer token from Authorization header."""
|
|
35
|
+
auth = request.headers.get("Authorization", "")
|
|
36
|
+
if auth.startswith("Bearer "):
|
|
37
|
+
return auth[7:].strip()
|
|
38
|
+
return ""
|
|
39
|
+
|
|
40
|
+
def _require_auth(self, request: Request) -> str:
|
|
41
|
+
"""Verify token, return module_id. Raise 401 on failure."""
|
|
42
|
+
token = self._extract_token(request)
|
|
43
|
+
module_id = self.store.verify_token(token)
|
|
44
|
+
if module_id is None:
|
|
45
|
+
raise HTTPException(status_code=401, detail="Invalid or missing token")
|
|
46
|
+
return module_id
|
|
47
|
+
|
|
48
|
+
def _require_launcher(self, request: Request):
|
|
49
|
+
"""Verify the caller is Launcher. Raise 403 if not."""
|
|
50
|
+
token = self._extract_token(request)
|
|
51
|
+
if not self.store.is_launcher(token):
|
|
52
|
+
raise HTTPException(status_code=403, detail="Only Launcher may call this endpoint")
|
|
53
|
+
|
|
54
|
+
# ── Event Hub connection ──
|
|
55
|
+
|
|
56
|
+
async def _try_connect_event_hub(self):
|
|
57
|
+
"""Check if Event Hub is registered and connect if not already connected."""
|
|
58
|
+
if self._ws:
|
|
59
|
+
return
|
|
60
|
+
# Look up Event Hub ws_endpoint from our own store
|
|
61
|
+
eh = self.store.modules.get("event_hub")
|
|
62
|
+
if not eh:
|
|
63
|
+
return
|
|
64
|
+
ws_url = (eh.get("metadata") or {}).get("ws_endpoint", "")
|
|
65
|
+
if not ws_url:
|
|
66
|
+
return
|
|
67
|
+
self._event_hub_ws_url = ws_url
|
|
68
|
+
if not self._ws_task:
|
|
69
|
+
self._ws_task = asyncio.create_task(self._ws_loop())
|
|
70
|
+
|
|
71
|
+
async def _ws_loop(self):
|
|
72
|
+
"""Connect to Event Hub, reconnect on failure."""
|
|
73
|
+
while True:
|
|
74
|
+
try:
|
|
75
|
+
await self._ws_connect()
|
|
76
|
+
except asyncio.CancelledError:
|
|
77
|
+
return
|
|
78
|
+
except Exception as e:
|
|
79
|
+
print(f"[registry] Event Hub connection error: {e}")
|
|
80
|
+
self._ws = None
|
|
81
|
+
await asyncio.sleep(5)
|
|
82
|
+
|
|
83
|
+
async def _ws_connect(self):
|
|
84
|
+
"""Single WebSocket session."""
|
|
85
|
+
# Use registry's own per-module token (registered by Launcher via /tokens)
|
|
86
|
+
# to avoid conflicting with Launcher's connection (same launcher_token → same module_id)
|
|
87
|
+
token = self.store.token_map.get("registry", "") or self.launcher_token
|
|
88
|
+
ws_url = f"{self._event_hub_ws_url}?token={token}"
|
|
89
|
+
async with websockets.connect(ws_url) as ws:
|
|
90
|
+
self._ws = ws
|
|
91
|
+
print("[registry] Connected to Event Hub")
|
|
92
|
+
async for raw in ws:
|
|
93
|
+
try:
|
|
94
|
+
msg = json.loads(raw)
|
|
95
|
+
except (json.JSONDecodeError, TypeError):
|
|
96
|
+
continue
|
|
97
|
+
msg_type = msg.get("type", "")
|
|
98
|
+
if msg_type == "error":
|
|
99
|
+
print(f"[registry] Event Hub error: {msg.get('message')}")
|
|
100
|
+
|
|
101
|
+
async def _publish_event(self, event_type: str, data: dict):
|
|
102
|
+
"""Publish event to Event Hub. Best-effort, no-op if not connected."""
|
|
103
|
+
if not self._ws:
|
|
104
|
+
return
|
|
105
|
+
from datetime import datetime, timezone
|
|
106
|
+
msg = {
|
|
107
|
+
"type": "event",
|
|
108
|
+
"event_id": str(uuid.uuid4()),
|
|
109
|
+
"event": event_type,
|
|
110
|
+
"source": "registry",
|
|
111
|
+
"timestamp": datetime.now(timezone.utc).isoformat(),
|
|
112
|
+
"data": data,
|
|
113
|
+
}
|
|
114
|
+
try:
|
|
115
|
+
await self._ws.send(json.dumps(msg))
|
|
116
|
+
except Exception:
|
|
117
|
+
pass
|
|
118
|
+
|
|
119
|
+
# ── App factory ──
|
|
120
|
+
|
|
121
|
+
def _create_app(self) -> FastAPI:
|
|
122
|
+
app = FastAPI(title="Kite Registry", docs_url=None, redoc_url=None)
|
|
123
|
+
server = self
|
|
124
|
+
|
|
125
|
+
@app.on_event("startup")
|
|
126
|
+
async def _startup():
|
|
127
|
+
server._ttl_task = asyncio.create_task(server._ttl_loop())
|
|
128
|
+
|
|
129
|
+
@app.on_event("shutdown")
|
|
130
|
+
async def _shutdown():
|
|
131
|
+
if server._ttl_task:
|
|
132
|
+
server._ttl_task.cancel()
|
|
133
|
+
if server._ws_task:
|
|
134
|
+
server._ws_task.cancel()
|
|
135
|
+
if server._ws:
|
|
136
|
+
await server._ws.close()
|
|
137
|
+
|
|
138
|
+
# ── 1. POST /modules ──
|
|
139
|
+
|
|
140
|
+
@app.post("/modules")
|
|
141
|
+
async def modules(request: Request):
|
|
142
|
+
caller = server._require_auth(request)
|
|
143
|
+
body = await request.json()
|
|
144
|
+
action = body.get("action", "")
|
|
145
|
+
|
|
146
|
+
if action == "register":
|
|
147
|
+
if "module_id" not in body:
|
|
148
|
+
raise HTTPException(400, "module_id required")
|
|
149
|
+
# Only Launcher or the module itself may register
|
|
150
|
+
if caller != "launcher" and caller != body["module_id"]:
|
|
151
|
+
raise HTTPException(403, f"Module '{caller}' cannot register as '{body['module_id']}'")
|
|
152
|
+
result = server.store.register_module(body)
|
|
153
|
+
if result.get("ok"):
|
|
154
|
+
mid = body["module_id"]
|
|
155
|
+
await server._publish_event("module.registered", {"module_id": mid})
|
|
156
|
+
# If Event Hub just registered, try connecting
|
|
157
|
+
if mid == "event_hub":
|
|
158
|
+
await server._try_connect_event_hub()
|
|
159
|
+
return result
|
|
160
|
+
|
|
161
|
+
elif action == "deregister":
|
|
162
|
+
mid = body.get("module_id")
|
|
163
|
+
if not mid:
|
|
164
|
+
raise HTTPException(400, "module_id required")
|
|
165
|
+
if caller != "launcher" and caller != mid:
|
|
166
|
+
raise HTTPException(403, f"Module '{caller}' cannot deregister '{mid}'")
|
|
167
|
+
result = server.store.deregister_module(mid)
|
|
168
|
+
if result.get("ok"):
|
|
169
|
+
await server._publish_event("module.unregistered", {"module_id": mid})
|
|
170
|
+
return result
|
|
171
|
+
|
|
172
|
+
elif action == "heartbeat":
|
|
173
|
+
mid = body.get("module_id")
|
|
174
|
+
if not mid:
|
|
175
|
+
raise HTTPException(400, "module_id required")
|
|
176
|
+
if caller != "launcher" and caller != mid:
|
|
177
|
+
raise HTTPException(403, f"Module '{caller}' cannot heartbeat for '{mid}'")
|
|
178
|
+
result = server.store.heartbeat(mid)
|
|
179
|
+
if result.get("ok"):
|
|
180
|
+
await server._publish_event("module.heartbeat", {"module_id": mid})
|
|
181
|
+
return result
|
|
182
|
+
|
|
183
|
+
else:
|
|
184
|
+
raise HTTPException(400, f"Unknown action: {action}")
|
|
185
|
+
|
|
186
|
+
# ── 2. GET /lookup ──
|
|
187
|
+
|
|
188
|
+
@app.get("/lookup")
|
|
189
|
+
async def lookup(request: Request, field: str = None, module: str = None, value: str = None):
|
|
190
|
+
server._require_auth(request)
|
|
191
|
+
return server.store.lookup(field=field, module=module, value=value)
|
|
192
|
+
|
|
193
|
+
# ── 3. GET /get/{path} ──
|
|
194
|
+
|
|
195
|
+
@app.get("/get/{path:path}")
|
|
196
|
+
async def get_by_path(request: Request, path: str):
|
|
197
|
+
server._require_auth(request)
|
|
198
|
+
val, found = server.store.get_by_path(path)
|
|
199
|
+
if not found:
|
|
200
|
+
raise HTTPException(404, f"Path not found: {path}")
|
|
201
|
+
return val
|
|
202
|
+
|
|
203
|
+
# ── 4. POST /tokens ──
|
|
204
|
+
|
|
205
|
+
@app.post("/tokens")
|
|
206
|
+
async def register_tokens(request: Request):
|
|
207
|
+
server._require_launcher(request)
|
|
208
|
+
body = await request.json()
|
|
209
|
+
server.store.register_tokens(body)
|
|
210
|
+
return {"ok": True}
|
|
211
|
+
|
|
212
|
+
# ── 5. POST /verify ──
|
|
213
|
+
|
|
214
|
+
@app.post("/verify")
|
|
215
|
+
async def verify_token(request: Request):
|
|
216
|
+
server._require_auth(request)
|
|
217
|
+
body = await request.json()
|
|
218
|
+
target_token = body.get("token", "")
|
|
219
|
+
module_id = server.store.verify_token(target_token)
|
|
220
|
+
if module_id:
|
|
221
|
+
return {"ok": True, "module_id": module_id}
|
|
222
|
+
return {"ok": False}
|
|
223
|
+
|
|
224
|
+
# ── 6. POST /query (stub) ──
|
|
225
|
+
# TODO: implement LLM semantic query per design §5.1
|
|
226
|
+
# accept {"question": "..."}, search registry with LLM, return matched modules/tools
|
|
227
|
+
|
|
228
|
+
@app.post("/query")
|
|
229
|
+
async def query(request: Request):
|
|
230
|
+
server._require_auth(request)
|
|
231
|
+
body = await request.json()
|
|
232
|
+
question = body.get("question", "")
|
|
233
|
+
return {"ok": False, "error": "LLM query not implemented yet", "question": question}
|
|
234
|
+
|
|
235
|
+
# ── 7. GET /health ──
|
|
236
|
+
|
|
237
|
+
@app.get("/health")
|
|
238
|
+
async def health():
|
|
239
|
+
return {
|
|
240
|
+
"status": "healthy",
|
|
241
|
+
"module_count": len(server.store.modules),
|
|
242
|
+
"online_count": sum(
|
|
243
|
+
1 for m in server.store.modules.values()
|
|
244
|
+
if m.get("status") == "online"
|
|
245
|
+
),
|
|
246
|
+
}
|
|
247
|
+
|
|
248
|
+
return app
|
|
249
|
+
|
|
250
|
+
async def _ttl_loop(self):
|
|
251
|
+
"""Periodically check heartbeat TTL and publish offline events."""
|
|
252
|
+
while True:
|
|
253
|
+
await asyncio.sleep(10)
|
|
254
|
+
expired = self.store.check_ttl()
|
|
255
|
+
for mid in expired:
|
|
256
|
+
await self._publish_event("module.offline", {"module_id": mid})
|
|
@@ -0,0 +1,232 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Registry in-memory store.
|
|
3
|
+
Manages module records, token verification, heartbeat TTL, lookup and get-by-path.
|
|
4
|
+
No persistence — Registry crash triggers Kite full restart, all modules re-register.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import fnmatch
|
|
8
|
+
import time
|
|
9
|
+
from typing import Any
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class RegistryStore:
|
|
13
|
+
|
|
14
|
+
def __init__(self, launcher_token: str):
|
|
15
|
+
self.launcher_token = launcher_token
|
|
16
|
+
self.token_map: dict[str, str] = {} # module_id -> token
|
|
17
|
+
self.modules: dict[str, dict] = {} # module_id -> registration payload
|
|
18
|
+
self.heartbeats: dict[str, float] = {} # module_id -> last heartbeat timestamp
|
|
19
|
+
self.ttl = 60 # seconds before marking offline
|
|
20
|
+
self.heartbeat_interval = 30
|
|
21
|
+
|
|
22
|
+
# ── Token verification ──
|
|
23
|
+
|
|
24
|
+
def verify_token(self, token: str) -> str | None:
|
|
25
|
+
"""Return module_id if token is valid, None otherwise."""
|
|
26
|
+
if token == self.launcher_token:
|
|
27
|
+
return "launcher"
|
|
28
|
+
for mid, tok in self.token_map.items():
|
|
29
|
+
if token == tok:
|
|
30
|
+
return mid
|
|
31
|
+
return None
|
|
32
|
+
|
|
33
|
+
def is_launcher(self, token: str) -> bool:
|
|
34
|
+
return token == self.launcher_token
|
|
35
|
+
|
|
36
|
+
def register_tokens(self, mapping: dict[str, str]):
|
|
37
|
+
"""Register module_id -> token mapping. Only Launcher may call this."""
|
|
38
|
+
self.token_map.update(mapping)
|
|
39
|
+
|
|
40
|
+
# ── Module lifecycle ──
|
|
41
|
+
|
|
42
|
+
_REQUIRED_FIELDS = ("module_id", "module_type", "api_endpoint")
|
|
43
|
+
|
|
44
|
+
def register_module(self, data: dict) -> dict:
|
|
45
|
+
"""Register or update a module. Idempotent — same module_id overwrites."""
|
|
46
|
+
# Validate required fields
|
|
47
|
+
missing = [f for f in self._REQUIRED_FIELDS if not data.get(f)]
|
|
48
|
+
if missing:
|
|
49
|
+
return {"ok": False, "error": f"Missing required fields: {', '.join(missing)}"}
|
|
50
|
+
|
|
51
|
+
mid = data["module_id"]
|
|
52
|
+
|
|
53
|
+
# Warn on tool name conflicts
|
|
54
|
+
new_tools = data.get("tools", {})
|
|
55
|
+
if isinstance(new_tools, dict):
|
|
56
|
+
for tool_name in new_tools:
|
|
57
|
+
for other_mid, other_data in self.modules.items():
|
|
58
|
+
if other_mid == mid:
|
|
59
|
+
continue
|
|
60
|
+
if tool_name in other_data.get("tools", {}):
|
|
61
|
+
print(f"[registry] WARNING: tool '{tool_name}' registered by both '{other_mid}' and '{mid}'")
|
|
62
|
+
|
|
63
|
+
# Strip action field — it's a request verb, not part of the registration payload
|
|
64
|
+
record = {k: v for k, v in data.items() if k != "action"}
|
|
65
|
+
record["status"] = "online"
|
|
66
|
+
record["registered_at"] = time.time()
|
|
67
|
+
self.modules[mid] = record
|
|
68
|
+
self.heartbeats[mid] = time.time()
|
|
69
|
+
return {"ok": True, "ttl": self.ttl, "heartbeat_interval": self.heartbeat_interval}
|
|
70
|
+
|
|
71
|
+
def deregister_module(self, module_id: str) -> dict:
|
|
72
|
+
"""Remove a module record immediately."""
|
|
73
|
+
self.modules.pop(module_id, None)
|
|
74
|
+
self.heartbeats.pop(module_id, None)
|
|
75
|
+
return {"ok": True}
|
|
76
|
+
|
|
77
|
+
def heartbeat(self, module_id: str) -> dict:
|
|
78
|
+
"""Renew heartbeat for a module."""
|
|
79
|
+
if module_id not in self.modules:
|
|
80
|
+
return {"ok": False, "error": "module not registered"}
|
|
81
|
+
self.heartbeats[module_id] = time.time()
|
|
82
|
+
self.modules[module_id]["status"] = "online"
|
|
83
|
+
return {"ok": True}
|
|
84
|
+
|
|
85
|
+
def check_ttl(self) -> list[str]:
|
|
86
|
+
"""Mark modules as offline if heartbeat expired. Returns list of newly-offline module_ids."""
|
|
87
|
+
now = time.time()
|
|
88
|
+
expired = []
|
|
89
|
+
for mid, last in list(self.heartbeats.items()):
|
|
90
|
+
if mid in self.modules and now - last > self.ttl:
|
|
91
|
+
if self.modules[mid].get("status") != "offline":
|
|
92
|
+
self.modules[mid]["status"] = "offline"
|
|
93
|
+
expired.append(mid)
|
|
94
|
+
return expired
|
|
95
|
+
|
|
96
|
+
# ── Get by dot-path ──
|
|
97
|
+
|
|
98
|
+
def get_by_path(self, path: str) -> Any | None:
|
|
99
|
+
"""
|
|
100
|
+
Resolve a dot-path like 'kernel.tools.read_file.endpoint'.
|
|
101
|
+
First segment is module_id, rest navigates into the registration dict.
|
|
102
|
+
Handles dotted keys (e.g. 'session.ended') by greedy matching.
|
|
103
|
+
Returns (value, True) on hit, (None, False) on miss.
|
|
104
|
+
"""
|
|
105
|
+
parts = path.split(".")
|
|
106
|
+
if not parts:
|
|
107
|
+
return None, False
|
|
108
|
+
|
|
109
|
+
# First segment: module_id
|
|
110
|
+
module_id = parts[0]
|
|
111
|
+
data = self.modules.get(module_id)
|
|
112
|
+
if data is None:
|
|
113
|
+
return None, False
|
|
114
|
+
if len(parts) == 1:
|
|
115
|
+
return data, True
|
|
116
|
+
|
|
117
|
+
return self._resolve_path(data, parts[1:])
|
|
118
|
+
|
|
119
|
+
@staticmethod
|
|
120
|
+
def _resolve_path(obj: Any, parts: list[str]) -> tuple[Any, bool]:
|
|
121
|
+
"""
|
|
122
|
+
Walk into obj using parts. Greedy: try joining multiple parts
|
|
123
|
+
as a single dotted key before falling back to single-segment traversal.
|
|
124
|
+
"""
|
|
125
|
+
if not parts:
|
|
126
|
+
return obj, True
|
|
127
|
+
if not isinstance(obj, dict):
|
|
128
|
+
return None, False
|
|
129
|
+
|
|
130
|
+
# Try longest key first (greedy match for dotted keys like 'session.ended')
|
|
131
|
+
for i in range(len(parts), 0, -1):
|
|
132
|
+
candidate = ".".join(parts[:i])
|
|
133
|
+
if candidate in obj:
|
|
134
|
+
return RegistryStore._resolve_path(obj[candidate], parts[i:])
|
|
135
|
+
|
|
136
|
+
return None, False
|
|
137
|
+
|
|
138
|
+
# ── Lookup (glob search) ──
|
|
139
|
+
|
|
140
|
+
def lookup(self, field: str = None, module: str = None, value: str = None) -> list[dict]:
|
|
141
|
+
"""
|
|
142
|
+
Search across all online modules. All three params support glob patterns.
|
|
143
|
+
Returns list of {field, module, api_endpoint, value}.
|
|
144
|
+
"""
|
|
145
|
+
results = []
|
|
146
|
+
for mid, data in self.modules.items():
|
|
147
|
+
if data.get("status") != "online":
|
|
148
|
+
continue
|
|
149
|
+
if module and not fnmatch.fnmatch(mid, module):
|
|
150
|
+
continue
|
|
151
|
+
|
|
152
|
+
api_ep = data.get("api_endpoint", "")
|
|
153
|
+
|
|
154
|
+
if field:
|
|
155
|
+
matches = self._match_fields(data, field)
|
|
156
|
+
for fpath, fval in matches:
|
|
157
|
+
if value and not self._value_matches(fval, value):
|
|
158
|
+
continue
|
|
159
|
+
results.append({
|
|
160
|
+
"field": fpath,
|
|
161
|
+
"module": mid,
|
|
162
|
+
"api_endpoint": api_ep,
|
|
163
|
+
"value": fval,
|
|
164
|
+
})
|
|
165
|
+
elif value:
|
|
166
|
+
for k, v in data.items():
|
|
167
|
+
if self._value_matches(v, value):
|
|
168
|
+
results.append({
|
|
169
|
+
"field": k,
|
|
170
|
+
"module": mid,
|
|
171
|
+
"api_endpoint": api_ep,
|
|
172
|
+
"value": v,
|
|
173
|
+
})
|
|
174
|
+
else:
|
|
175
|
+
results.append({
|
|
176
|
+
"field": "module_id",
|
|
177
|
+
"module": mid,
|
|
178
|
+
"api_endpoint": api_ep,
|
|
179
|
+
"value": mid,
|
|
180
|
+
})
|
|
181
|
+
|
|
182
|
+
return results
|
|
183
|
+
|
|
184
|
+
# ── Lookup helpers ──
|
|
185
|
+
|
|
186
|
+
@staticmethod
|
|
187
|
+
def _match_fields(data: dict, pattern: str) -> list[tuple[str, Any]]:
|
|
188
|
+
"""
|
|
189
|
+
Walk a module's registration dict and find all field paths matching a glob pattern.
|
|
190
|
+
Pattern like 'tools.*' matches top-level keys under 'tools'.
|
|
191
|
+
Pattern like 'tools.*file*' matches tool names containing 'file'.
|
|
192
|
+
Pattern like 'events_publish.session.ended' matches dotted keys via greedy join.
|
|
193
|
+
Returns list of (field_path, value).
|
|
194
|
+
"""
|
|
195
|
+
parts = pattern.split(".")
|
|
196
|
+
return RegistryStore._walk_fields(data, parts, "")
|
|
197
|
+
|
|
198
|
+
@staticmethod
|
|
199
|
+
def _walk_fields(obj: Any, parts: list[str], prefix: str) -> list[tuple[str, Any]]:
|
|
200
|
+
"""Recursively walk dict, matching glob pattern segments against keys."""
|
|
201
|
+
if not parts:
|
|
202
|
+
return [(prefix, obj)] if prefix else []
|
|
203
|
+
if not isinstance(obj, dict):
|
|
204
|
+
return []
|
|
205
|
+
|
|
206
|
+
results = []
|
|
207
|
+
# Try greedy: join multiple pattern segments to match dotted keys
|
|
208
|
+
for i in range(len(parts), 0, -1):
|
|
209
|
+
candidate_pattern = ".".join(parts[:i])
|
|
210
|
+
remaining = parts[i:]
|
|
211
|
+
|
|
212
|
+
for key in obj:
|
|
213
|
+
if fnmatch.fnmatch(key, candidate_pattern):
|
|
214
|
+
new_prefix = f"{prefix}.{key}" if prefix else key
|
|
215
|
+
if remaining:
|
|
216
|
+
results.extend(
|
|
217
|
+
RegistryStore._walk_fields(obj[key], remaining, new_prefix)
|
|
218
|
+
)
|
|
219
|
+
else:
|
|
220
|
+
results.append((new_prefix, obj[key]))
|
|
221
|
+
|
|
222
|
+
if results:
|
|
223
|
+
break # greedy: longest match wins
|
|
224
|
+
|
|
225
|
+
return results
|
|
226
|
+
|
|
227
|
+
@staticmethod
|
|
228
|
+
def _value_matches(val: Any, pattern: str) -> bool:
|
|
229
|
+
"""Check if a value matches a glob pattern. Dicts/lists never match."""
|
|
230
|
+
if isinstance(val, (dict, list)):
|
|
231
|
+
return False
|
|
232
|
+
return fnmatch.fnmatch(str(val), pattern)
|
|
File without changes
|
|
Binary file
|
|
File without changes
|
|
Binary file
|
|
File without changes
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
@@ -0,0 +1,143 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Watchdog entry point.
|
|
3
|
+
Reads boot_info from stdin, registers to Registry, starts health monitor.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
import json
|
|
7
|
+
import os
|
|
8
|
+
import socket
|
|
9
|
+
import sys
|
|
10
|
+
|
|
11
|
+
import httpx
|
|
12
|
+
import uvicorn
|
|
13
|
+
|
|
14
|
+
# Ensure project root is on sys.path
|
|
15
|
+
_this_dir = os.path.dirname(os.path.abspath(__file__))
|
|
16
|
+
_project_root = os.path.dirname(os.path.dirname(os.path.dirname(_this_dir)))
|
|
17
|
+
if _project_root not in sys.path:
|
|
18
|
+
sys.path.insert(0, _project_root)
|
|
19
|
+
|
|
20
|
+
from extensions.services.watchdog.monitor import HealthMonitor
|
|
21
|
+
from extensions.services.watchdog.server import WatchdogServer
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def _get_free_port() -> int:
|
|
25
|
+
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
|
|
26
|
+
s.bind(("127.0.0.1", 0))
|
|
27
|
+
return s.getsockname()[1]
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def _register_to_registry(token: str, registry_url: str, port: int):
|
|
31
|
+
payload = {
|
|
32
|
+
"action": "register",
|
|
33
|
+
"module_id": "watchdog",
|
|
34
|
+
"module_type": "service",
|
|
35
|
+
"name": "Watchdog",
|
|
36
|
+
"api_endpoint": f"http://127.0.0.1:{port}",
|
|
37
|
+
"health_endpoint": "/health",
|
|
38
|
+
"events_publish": {
|
|
39
|
+
"watchdog.module.unhealthy": {"description": "Module failed health check"},
|
|
40
|
+
"watchdog.module.recovered": {"description": "Module recovered from unhealthy"},
|
|
41
|
+
"watchdog.alert": {"description": "Module restarted too many times"},
|
|
42
|
+
},
|
|
43
|
+
"events_subscribe": [
|
|
44
|
+
"module.started",
|
|
45
|
+
"module.stopped",
|
|
46
|
+
],
|
|
47
|
+
}
|
|
48
|
+
headers = {"Authorization": f"Bearer {token}"}
|
|
49
|
+
resp = httpx.post(
|
|
50
|
+
f"{registry_url}/modules",
|
|
51
|
+
json=payload, headers=headers, timeout=5,
|
|
52
|
+
)
|
|
53
|
+
if resp.status_code == 200:
|
|
54
|
+
print("[watchdog] Registered to Registry")
|
|
55
|
+
else:
|
|
56
|
+
print(f"[watchdog] WARNING: Registry returned {resp.status_code}")
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def _get_launcher_url(token: str, registry_url: str) -> str:
|
|
60
|
+
"""Discover Launcher API endpoint from Registry."""
|
|
61
|
+
headers = {"Authorization": f"Bearer {token}"}
|
|
62
|
+
try:
|
|
63
|
+
resp = httpx.get(
|
|
64
|
+
f"{registry_url}/get/launcher.api_endpoint",
|
|
65
|
+
headers=headers, timeout=5,
|
|
66
|
+
)
|
|
67
|
+
if resp.status_code == 200:
|
|
68
|
+
return resp.json()
|
|
69
|
+
except Exception:
|
|
70
|
+
pass
|
|
71
|
+
return ""
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def _get_event_hub_ws(token: str, registry_url: str) -> str:
|
|
75
|
+
"""Discover Event Hub WebSocket endpoint from Registry, with retry."""
|
|
76
|
+
import time
|
|
77
|
+
headers = {"Authorization": f"Bearer {token}"}
|
|
78
|
+
deadline = time.time() + 10
|
|
79
|
+
while time.time() < deadline:
|
|
80
|
+
try:
|
|
81
|
+
resp = httpx.get(
|
|
82
|
+
f"{registry_url}/get/event_hub.metadata.ws_endpoint",
|
|
83
|
+
headers=headers, timeout=3,
|
|
84
|
+
)
|
|
85
|
+
if resp.status_code == 200:
|
|
86
|
+
val = resp.json()
|
|
87
|
+
if val:
|
|
88
|
+
return val
|
|
89
|
+
except Exception:
|
|
90
|
+
pass
|
|
91
|
+
time.sleep(1)
|
|
92
|
+
return ""
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
def main():
|
|
96
|
+
# Read boot_info from stdin
|
|
97
|
+
token = ""
|
|
98
|
+
registry_port = 0
|
|
99
|
+
try:
|
|
100
|
+
line = sys.stdin.readline().strip()
|
|
101
|
+
if line:
|
|
102
|
+
boot_info = json.loads(line)
|
|
103
|
+
token = boot_info.get("token", "")
|
|
104
|
+
registry_port = boot_info.get("registry_port", 0)
|
|
105
|
+
except Exception:
|
|
106
|
+
pass
|
|
107
|
+
|
|
108
|
+
if not token or not registry_port:
|
|
109
|
+
print("[watchdog] ERROR: Missing token or registry_port in boot_info")
|
|
110
|
+
sys.exit(1)
|
|
111
|
+
|
|
112
|
+
print(f"[watchdog] Token received ({len(token)} chars), registry port: {registry_port}")
|
|
113
|
+
|
|
114
|
+
registry_url = f"http://127.0.0.1:{registry_port}"
|
|
115
|
+
port = _get_free_port()
|
|
116
|
+
|
|
117
|
+
# Register to Registry
|
|
118
|
+
_register_to_registry(token, registry_url, port)
|
|
119
|
+
|
|
120
|
+
# Discover Launcher URL
|
|
121
|
+
launcher_url = _get_launcher_url(token, registry_url)
|
|
122
|
+
if not launcher_url:
|
|
123
|
+
print("[watchdog] WARNING: Could not discover Launcher URL, restart disabled")
|
|
124
|
+
|
|
125
|
+
# Discover Event Hub WebSocket URL
|
|
126
|
+
event_hub_ws = _get_event_hub_ws(token, registry_url)
|
|
127
|
+
if not event_hub_ws:
|
|
128
|
+
print("[watchdog] WARNING: Could not discover Event Hub WS, events disabled")
|
|
129
|
+
|
|
130
|
+
# Create monitor and server
|
|
131
|
+
monitor = HealthMonitor(
|
|
132
|
+
own_token=token,
|
|
133
|
+
registry_url=registry_url,
|
|
134
|
+
launcher_url=launcher_url,
|
|
135
|
+
)
|
|
136
|
+
server = WatchdogServer(monitor, token=token, event_hub_ws=event_hub_ws)
|
|
137
|
+
|
|
138
|
+
print(f"[watchdog] Starting on port {port}")
|
|
139
|
+
uvicorn.run(server.app, host="127.0.0.1", port=port, log_level="warning")
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
if __name__ == "__main__":
|
|
143
|
+
main()
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: watchdog
|
|
3
|
+
display_name: Watchdog
|
|
4
|
+
version: "1.0"
|
|
5
|
+
type: service
|
|
6
|
+
state: enabled
|
|
7
|
+
runtime: python
|
|
8
|
+
entry: entry.py
|
|
9
|
+
events:
|
|
10
|
+
- watchdog.module.unhealthy
|
|
11
|
+
- watchdog.module.recovered
|
|
12
|
+
- watchdog.alert
|
|
13
|
+
subscriptions:
|
|
14
|
+
- module.started
|
|
15
|
+
- module.stopped
|
|
16
|
+
---
|
|
17
|
+
|
|
18
|
+
# Watchdog(保活模块)
|
|
19
|
+
|
|
20
|
+
应用层健康监控扩展模块。Launcher 处理进程级故障,Watchdog 处理应用级故障(进程活着但不健康)。
|
|
21
|
+
|
|
22
|
+
- 心跳检测 — 定期检查各模块的 `/health` 端点
|
|
23
|
+
- 资源监控 — 检测模块健康状态中的异常指标
|
|
24
|
+
- 非 core 模块重启 — 发现异常时通过 Launcher API 重启模块
|
|
25
|
+
- 告警通知 — 模块持续异常时,通过 Event Hub 发布告警事件
|