mcp-coordinator 0.2.1 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +846 -846
- package/dashboard/Dockerfile +19 -19
- package/dashboard/public/index.html +1178 -1178
- package/dist/cli/dashboard.js +9 -5
- package/dist/cli/server/backup.d.ts +7 -0
- package/dist/cli/server/backup.js +162 -0
- package/dist/cli/server/index.js +5 -0
- package/dist/cli/server/restore.d.ts +2 -0
- package/dist/cli/server/restore.js +117 -0
- package/dist/cli/server/start.js +24 -1
- package/dist/cli/server/status.js +16 -23
- package/dist/src/agent-activity.js +6 -6
- package/dist/src/agent-registry.js +6 -6
- package/dist/src/announce-workflow.d.ts +52 -0
- package/dist/src/announce-workflow.js +91 -0
- package/dist/src/consultation.d.ts +22 -0
- package/dist/src/consultation.js +118 -45
- package/dist/src/database.js +126 -126
- package/dist/src/db-adapter.d.ts +30 -0
- package/dist/src/db-adapter.js +32 -1
- package/dist/src/dependency-map.js +5 -5
- package/dist/src/file-tracker.d.ts +10 -0
- package/dist/src/file-tracker.js +40 -8
- package/dist/src/http/handle-health.d.ts +23 -0
- package/dist/src/http/handle-health.js +86 -0
- package/dist/src/http/handle-rest.d.ts +23 -0
- package/dist/src/http/handle-rest.js +374 -0
- package/dist/src/http/utils.d.ts +15 -0
- package/dist/src/http/utils.js +39 -0
- package/dist/src/impact-scorer.js +87 -50
- package/dist/src/introspection.js +1 -1
- package/dist/src/metrics.d.ts +83 -0
- package/dist/src/metrics.js +162 -0
- package/dist/src/mqtt-bridge.d.ts +21 -0
- package/dist/src/mqtt-bridge.js +55 -5
- package/dist/src/mqtt-broker.d.ts +16 -0
- package/dist/src/mqtt-broker.js +16 -1
- package/dist/src/path-guard.d.ts +14 -0
- package/dist/src/path-guard.js +44 -0
- package/dist/src/reset-guard.d.ts +16 -0
- package/dist/src/reset-guard.js +24 -0
- package/dist/src/serve-http.d.ts +31 -1
- package/dist/src/serve-http.js +189 -446
- package/dist/src/server-setup.d.ts +2 -0
- package/dist/src/server-setup.js +25 -366
- package/dist/src/sse-emitter.d.ts +6 -0
- package/dist/src/sse-emitter.js +50 -2
- package/dist/src/tools/agents-tools.d.ts +8 -0
- package/dist/src/tools/agents-tools.js +46 -0
- package/dist/src/tools/consultation-tools.d.ts +21 -0
- package/dist/src/tools/consultation-tools.js +170 -0
- package/dist/src/tools/dependencies-tools.d.ts +8 -0
- package/dist/src/tools/dependencies-tools.js +27 -0
- package/dist/src/tools/files-tools.d.ts +8 -0
- package/dist/src/tools/files-tools.js +28 -0
- package/dist/src/tools/mqtt-tools.d.ts +9 -0
- package/dist/src/tools/mqtt-tools.js +33 -0
- package/dist/src/tools/status-tools.d.ts +8 -0
- package/dist/src/tools/status-tools.js +63 -0
- package/package.json +83 -80
package/dist/src/serve-http.js
CHANGED
|
@@ -14,8 +14,10 @@ const __dirname = path.dirname(__filename);
|
|
|
14
14
|
import { StreamableHTTPServerTransport } from "@modelcontextprotocol/sdk/server/streamableHttp.js";
|
|
15
15
|
import { createServices, createMcpServer } from "./server-setup.js";
|
|
16
16
|
import { createLogger } from "./logger.js";
|
|
17
|
-
import { initAuth, authenticateRequest, createToken, refreshToken, revokeAgent, setAuthLogger } from "./auth.js";
|
|
18
|
-
import {
|
|
17
|
+
import { initAuth, authenticateRequest, createToken, refreshToken, revokeAgent, setAuthLogger, verifyToken } from "./auth.js";
|
|
18
|
+
import { safeJoinUnderRoot } from "./path-guard.js";
|
|
19
|
+
import { handleRest as handleRestExt } from "./http/handle-rest.js";
|
|
20
|
+
import { parseBody as parseBodyShared, json as jsonShared } from "./http/utils.js";
|
|
19
21
|
import { getVersion } from "../cli/version.js";
|
|
20
22
|
const VERSION = getVersion();
|
|
21
23
|
import { startEmbeddedMqttBroker } from "./mqtt-broker.js";
|
|
@@ -48,439 +50,36 @@ let httpLog;
|
|
|
48
50
|
let mcpLog;
|
|
49
51
|
let authLog;
|
|
50
52
|
let currentRunConfig = null;
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
try {
|
|
57
|
-
resolve(body ? JSON.parse(body) : {});
|
|
58
|
-
}
|
|
59
|
-
catch {
|
|
60
|
-
reject(new Error("Invalid JSON"));
|
|
61
|
-
}
|
|
62
|
-
});
|
|
63
|
-
req.on("error", reject);
|
|
64
|
-
});
|
|
65
|
-
}
|
|
66
|
-
function json(res, data, status = 200) {
|
|
67
|
-
res.writeHead(status, { "Content-Type": "application/json", "Access-Control-Allow-Origin": "*" });
|
|
68
|
-
res.end(JSON.stringify(data));
|
|
69
|
-
}
|
|
53
|
+
// S1: parseBody and json moved to ./http/utils.js (shared with handle-rest.ts).
|
|
54
|
+
// Re-bound to local names so the rest of this file (handleAuth, handleSse,
|
|
55
|
+
// startServer) can keep using `parseBody` / `json` without changes.
|
|
56
|
+
const parseBody = parseBodyShared;
|
|
57
|
+
const json = jsonShared;
|
|
70
58
|
function decodeJwtPayload(token) {
|
|
59
|
+
// Used only on tokens we just minted ourselves (to read the `exp` claim
|
|
60
|
+
// before returning it to the client). Real verification of inbound tokens
|
|
61
|
+
// happens in `authenticateRequest` via jose.jwtVerify().
|
|
71
62
|
const base64url = token.split(".")[1];
|
|
72
|
-
|
|
73
|
-
return JSON.parse(atob(base64));
|
|
63
|
+
return JSON.parse(Buffer.from(base64url, "base64url").toString("utf-8"));
|
|
74
64
|
}
|
|
75
65
|
function safeEqual(a, b) {
|
|
76
66
|
if (a.length !== b.length)
|
|
77
67
|
return false;
|
|
78
68
|
return timingSafeEqual(Buffer.from(a), Buffer.from(b));
|
|
79
69
|
}
|
|
70
|
+
// S1: handleRest extracted to ./http/handle-rest.ts. Thin wrapper here keeps
|
|
71
|
+
// startServer's call site stable while the 382-line REST router lives in its
|
|
72
|
+
// own module. currentRunConfig stays here as the single mutable owner; the
|
|
73
|
+
// extracted function reads/writes via getRunConfig/setRunConfig accessors.
|
|
80
74
|
async function handleRest(req, res) {
|
|
81
|
-
const
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
if (isPoll) {
|
|
90
|
-
httpLog.debug({ method: req.method, url, agent_id: agentId }, "REST request");
|
|
91
|
-
}
|
|
92
|
-
else {
|
|
93
|
-
httpLog.info({ method: req.method, url, agent_id: agentId }, "REST request");
|
|
94
|
-
}
|
|
95
|
-
const { registry, activityTracker, consultation, fileTracker, impactScorer, introspection, sseEmitter, mqttBridge, quotaCache } = services;
|
|
96
|
-
if (url === "/api/register") {
|
|
97
|
-
const { agent_id, name, modules } = body;
|
|
98
|
-
const agent = registry.register(agent_id, name, modules || []);
|
|
99
|
-
sseEmitter.emit("agent_online", { agent_id, name, modules });
|
|
100
|
-
json(res, agent);
|
|
101
|
-
}
|
|
102
|
-
else if (url === "/api/session-start") {
|
|
103
|
-
const { agent_id, agent_name } = body;
|
|
104
|
-
const online = registry.listOnline();
|
|
105
|
-
const openThreads = consultation.listThreads({ status: "open" });
|
|
106
|
-
const hotFiles = fileTracker.getHotFiles(30);
|
|
107
|
-
const briefing = [
|
|
108
|
-
`Agents en ligne: ${online.map((a) => a.name).join(", ") || "aucun"}`,
|
|
109
|
-
`Consultations ouvertes: ${openThreads.length}`,
|
|
110
|
-
`Hot files: ${hotFiles.map((f) => f.file_path).join(", ") || "aucun"}`,
|
|
111
|
-
].join("\n");
|
|
112
|
-
json(res, { briefing, summary: { online: online.length, open_threads: openThreads.length, hot_files: hotFiles.length } });
|
|
113
|
-
}
|
|
114
|
-
else if (url === "/api/session-stop") {
|
|
115
|
-
const { agent_id } = body;
|
|
116
|
-
registry.setOffline(agent_id);
|
|
117
|
-
activityTracker.reportOffline(agent_id);
|
|
118
|
-
consultation.handleAgentDeparture(agent_id);
|
|
119
|
-
sseEmitter.emit("agent_offline", { agent_id });
|
|
120
|
-
json(res, { ok: true });
|
|
121
|
-
}
|
|
122
|
-
else if (url === "/api/check-conflict") {
|
|
123
|
-
const { file, agent_id } = body;
|
|
124
|
-
const conflict = fileTracker.checkFileConflict(file, agent_id, 30);
|
|
125
|
-
const warnings = [];
|
|
126
|
-
if (conflict.conflict) {
|
|
127
|
-
warnings.push(`File ${file} recently edited by: ${conflict.agents.join(", ")}`);
|
|
128
|
-
}
|
|
129
|
-
json(res, { conflict: conflict.conflict, warnings });
|
|
130
|
-
}
|
|
131
|
-
else if (url === "/api/log-file") {
|
|
132
|
-
const { session_id, agent_id, agent_name, tool_name, file } = body;
|
|
133
|
-
fileTracker.log({ session_id, agent_id, agent_name, tool_name, file_path: file });
|
|
134
|
-
activityTracker.reportFileActivity(agent_id, file);
|
|
135
|
-
sseEmitter.emit("file_edited", { agent_id, agent_name: agent_name || agent_id, file, tool_name });
|
|
136
|
-
json(res, { ok: true });
|
|
137
|
-
}
|
|
138
|
-
else if (url === "/api/announce") {
|
|
139
|
-
const { agent_id, subject, plan, target_modules, target_files, depends_on_files, exports_affected, keep_open, assigned_to } = body;
|
|
140
|
-
// Quality gate on plan
|
|
141
|
-
const planQuality = assessPlanQuality(plan);
|
|
142
|
-
const effectiveMode = planQuality.mode;
|
|
143
|
-
const thread = consultation.announceWork({ agent_id, subject, plan, target_modules, target_files, depends_on_files, exports_affected, keep_open, assigned_to });
|
|
144
|
-
const agentInfo = registry.get(agent_id);
|
|
145
|
-
// Impact scoring: categorize all online agents
|
|
146
|
-
const categorized = impactScorer.categorize({
|
|
147
|
-
agent_id, target_modules, target_files, depends_on_files, exports_affected,
|
|
148
|
-
});
|
|
149
|
-
// Override expected_respondents with concerned agents from scorer
|
|
150
|
-
{
|
|
151
|
-
const db = (await import("./database.js")).getDb();
|
|
152
|
-
const concernedIds = categorized.concerned.map(s => s.agent_id);
|
|
153
|
-
db.prepare("UPDATE threads SET expected_respondents = ? WHERE id = ?")
|
|
154
|
-
.run(JSON.stringify(concernedIds), thread.id);
|
|
155
|
-
// Only auto-resolve when truly alone — no other online agents.
|
|
156
|
-
// If peers are online but not yet concerned (e.g. they haven't announced
|
|
157
|
-
// yet), keep the thread open so a subsequent announce can still match
|
|
158
|
-
// this work via Layer 0. Thread will timeout naturally if no one joins.
|
|
159
|
-
const otherOnlineCount = registry.listOnline().filter((a) => a.id !== agent_id).length;
|
|
160
|
-
const shouldAutoResolve = concernedIds.length === 0 && otherOnlineCount === 0;
|
|
161
|
-
if (shouldAutoResolve && thread.status === "open" && !keep_open) {
|
|
162
|
-
db.prepare("UPDATE threads SET status = 'resolved', resolved_at = ? WHERE id = ?")
|
|
163
|
-
.run(new Date().toISOString(), thread.id);
|
|
164
|
-
consultation.emitResolution(thread.id, "auto_resolved");
|
|
165
|
-
}
|
|
166
|
-
}
|
|
167
|
-
// Emit impact_scored SSE events for all agents
|
|
168
|
-
for (const s of [...categorized.concerned, ...categorized.gray_zone, ...categorized.pass]) {
|
|
169
|
-
sseEmitter.emit("impact_scored", {
|
|
170
|
-
thread_id: thread.id, agent_id: s.agent_id, agent_name: s.agent_name,
|
|
171
|
-
score: s.score, reasons: s.reasons, category: s.score >= 90 ? "concerned" : s.score >= 30 ? "gray_zone" : "pass",
|
|
172
|
-
});
|
|
173
|
-
}
|
|
174
|
-
// Create introspection records and emit introspection_requested for gray_zone agents
|
|
175
|
-
for (const s of categorized.gray_zone) {
|
|
176
|
-
introspection.create({ thread_id: thread.id, agent_id: s.agent_id, score: s.score, reasons: s.reasons });
|
|
177
|
-
sseEmitter.emit("introspection_requested", {
|
|
178
|
-
thread_id: thread.id, agent_id: s.agent_id, agent_name: s.agent_name, score: s.score, reasons: s.reasons,
|
|
179
|
-
});
|
|
180
|
-
}
|
|
181
|
-
const updated = consultation.getThread(thread.id);
|
|
182
|
-
const respondents = JSON.parse(updated.expected_respondents || "[]");
|
|
183
|
-
// Emit downgrade event when plan is provided but quality is insufficient
|
|
184
|
-
if (plan && effectiveMode === "discovery") {
|
|
185
|
-
sseEmitter.emit("impact_scored", {
|
|
186
|
-
thread_id: thread.id,
|
|
187
|
-
agent_id: agent_id,
|
|
188
|
-
agent_name: agentInfo?.name || agent_id,
|
|
189
|
-
score: planQuality.score,
|
|
190
|
-
reasons: [`plan downgraded: score ${planQuality.score}/3 — ${!planQuality.checks.mentions_files ? 'no files' : ''} ${!planQuality.checks.concrete_approach ? 'vague approach' : ''} ${!planQuality.checks.sufficient_detail ? 'too short' : ''}`.trim()],
|
|
191
|
-
category: "plan_quality",
|
|
192
|
-
});
|
|
193
|
-
}
|
|
194
|
-
sseEmitter.emit("thread_opened", {
|
|
195
|
-
thread_id: thread.id, subject, agent_id, agent_name: agentInfo?.name || agent_id,
|
|
196
|
-
target_modules, target_files, expected_respondents: respondents,
|
|
197
|
-
conflicts: updated.conflicts ? JSON.parse(updated.conflicts) : [],
|
|
198
|
-
created_at: updated.created_at,
|
|
199
|
-
mode: effectiveMode,
|
|
200
|
-
plan: plan || null,
|
|
201
|
-
plan_quality: planQuality,
|
|
202
|
-
});
|
|
203
|
-
json(res, { thread_id: thread.id, status: updated.status, impact: categorized });
|
|
204
|
-
}
|
|
205
|
-
else if (url === "/api/post-to-thread") {
|
|
206
|
-
const { thread_id, agent_id, agent_name, type, content } = body;
|
|
207
|
-
// Pre-check the thread so we can return actionable status codes instead
|
|
208
|
-
// of always-500 on any error. The client uses the status to decide
|
|
209
|
-
// whether to warn (unexpected) or silently skip (normal race).
|
|
210
|
-
const targetThread = consultation.getThread(thread_id);
|
|
211
|
-
if (!targetThread) {
|
|
212
|
-
json(res, { error: "thread_not_found", thread_id }, 404);
|
|
213
|
-
return;
|
|
214
|
-
}
|
|
215
|
-
if (targetThread.status === "cancelled") {
|
|
216
|
-
json(res, { error: "thread_cancelled", thread_id }, 410);
|
|
217
|
-
return;
|
|
218
|
-
}
|
|
219
|
-
const msg = consultation.postToThread({ thread_id, agent_id, agent_name, type, content });
|
|
220
|
-
const thread = consultation.getThread(thread_id);
|
|
221
|
-
sseEmitter.emit("message_posted", {
|
|
222
|
-
thread_id, agent_id, agent_name: agent_name || agent_id,
|
|
223
|
-
type, content, round: thread?.round || 1,
|
|
224
|
-
token_estimate: msg.token_estimate || 0,
|
|
225
|
-
});
|
|
226
|
-
json(res, msg);
|
|
227
|
-
}
|
|
228
|
-
else if (url === "/api/token-usage") {
|
|
229
|
-
// Agent → coordinator telemetry, emitted once per LLM turn so the dashboard
|
|
230
|
-
// and reports can pinpoint where tokens are being burned.
|
|
231
|
-
const payload = body;
|
|
232
|
-
sseEmitter.emit("token_usage", payload);
|
|
233
|
-
json(res, { ok: true });
|
|
234
|
-
}
|
|
235
|
-
else if (url === "/api/unclaim-task") {
|
|
236
|
-
const { thread_id, agent_id } = body;
|
|
237
|
-
if (!thread_id || !agent_id) {
|
|
238
|
-
json(res, { success: false, error: "thread_id and agent_id required" }, 400);
|
|
239
|
-
return;
|
|
240
|
-
}
|
|
241
|
-
const db = (await import("./database.js")).getDb();
|
|
242
|
-
// F4: increment unclaim counter. After POISON_THRESHOLD aborts, flip status
|
|
243
|
-
// to "poisoned" so no agent claims it again — prevents the tight
|
|
244
|
-
// claim → no DONE → unclaim → re-claim loop we observed on stuck tasks.
|
|
245
|
-
// Only the claiming agent can unclaim to prevent cross-agent interference.
|
|
246
|
-
const POISON_THRESHOLD = 2;
|
|
247
|
-
const result = db.prepare("UPDATE threads SET claimed_by = NULL, claimed_at = NULL, unclaim_count = COALESCE(unclaim_count, 0) + 1 WHERE id = ? AND claimed_by = ? AND status = 'open'").run(thread_id, agent_id);
|
|
248
|
-
let poisoned = false;
|
|
249
|
-
if (result.changes === 1) {
|
|
250
|
-
const row = db.prepare("SELECT unclaim_count FROM threads WHERE id = ?").get(thread_id);
|
|
251
|
-
if (row && (row.unclaim_count ?? 0) >= POISON_THRESHOLD) {
|
|
252
|
-
db.prepare("UPDATE threads SET status = 'poisoned' WHERE id = ? AND status = 'open'").run(thread_id);
|
|
253
|
-
poisoned = true;
|
|
254
|
-
httpLog.warn({ thread_id, unclaim_count: row.unclaim_count }, "thread poisoned after repeated unclaims");
|
|
255
|
-
}
|
|
256
|
-
}
|
|
257
|
-
json(res, { success: result.changes === 1, poisoned });
|
|
258
|
-
}
|
|
259
|
-
else if (url === "/api/claim-task") {
|
|
260
|
-
const { thread_id, agent_id } = body;
|
|
261
|
-
if (!thread_id || !agent_id) {
|
|
262
|
-
json(res, { success: false, error: "thread_id and agent_id required" }, 400);
|
|
263
|
-
return;
|
|
264
|
-
}
|
|
265
|
-
const db = (await import("./database.js")).getDb();
|
|
266
|
-
// Only claim threads with status='open' — poisoned threads are filtered out
|
|
267
|
-
// automatically because the status filter excludes them.
|
|
268
|
-
// Directed-dispatch constraint: if assigned_to is set, only that specific
|
|
269
|
-
// agent can claim; NULL keeps the original open-pool semantics.
|
|
270
|
-
const result = db.prepare("UPDATE threads SET claimed_by = ?, claimed_at = ? WHERE id = ? AND claimed_by IS NULL AND status = 'open' AND (assigned_to IS NULL OR assigned_to = ?)").run(agent_id, new Date().toISOString(), thread_id, agent_id);
|
|
271
|
-
if (result.changes === 1) {
|
|
272
|
-
mqttBridge.publishTaskClaimed(thread_id, agent_id);
|
|
273
|
-
sseEmitter.emit("task_claimed", { thread_id, agent_id });
|
|
274
|
-
json(res, { success: true });
|
|
275
|
-
}
|
|
276
|
-
else {
|
|
277
|
-
const thread = consultation.getThread(thread_id);
|
|
278
|
-
// Surface the assigned_to in the 'why not' response so clients can
|
|
279
|
-
// distinguish "already claimed by X" from "reserved for Y".
|
|
280
|
-
json(res, {
|
|
281
|
-
success: false,
|
|
282
|
-
claimed_by: thread?.claimed_by || null,
|
|
283
|
-
assigned_to: thread?.assigned_to || null,
|
|
284
|
-
status: thread?.status,
|
|
285
|
-
});
|
|
286
|
-
}
|
|
287
|
-
}
|
|
288
|
-
else if (url === "/api/propose-resolution") {
|
|
289
|
-
const { thread_id, agent_id, summary } = body;
|
|
290
|
-
const agentInfo = registry.get(agent_id);
|
|
291
|
-
consultation.proposeResolution(thread_id, agent_id, summary);
|
|
292
|
-
sseEmitter.emit("resolution_proposed", {
|
|
293
|
-
thread_id, agent_id, agent_name: agentInfo?.name || agent_id, summary,
|
|
294
|
-
});
|
|
295
|
-
json(res, consultation.getThread(thread_id));
|
|
296
|
-
mqttBridge.publishTaskCompleted(thread_id, agent_id, summary);
|
|
297
|
-
}
|
|
298
|
-
else if (url === "/api/approve-resolution") {
|
|
299
|
-
const { thread_id, agent_id } = body;
|
|
300
|
-
const agentInfo = registry.get(agent_id);
|
|
301
|
-
consultation.approveResolution(thread_id, agent_id, agentInfo?.name);
|
|
302
|
-
const t = consultation.getThread(thread_id);
|
|
303
|
-
json(res, t);
|
|
304
|
-
}
|
|
305
|
-
else if (url?.startsWith("/api/consultation/") && url?.endsWith("/status")) {
|
|
306
|
-
const threadId = url.split("/")[3];
|
|
307
|
-
const thread = consultation.getThreadWithMessages(threadId);
|
|
308
|
-
if (!thread) {
|
|
309
|
-
json(res, { error: "not found" }, 404);
|
|
310
|
-
}
|
|
311
|
-
else {
|
|
312
|
-
json(res, {
|
|
313
|
-
status: thread.thread.status,
|
|
314
|
-
messages: thread.messages,
|
|
315
|
-
resolution_summary: thread.thread.resolution_summary,
|
|
316
|
-
expected_respondents: JSON.parse(thread.thread.expected_respondents || "[]"),
|
|
317
|
-
});
|
|
318
|
-
}
|
|
319
|
-
}
|
|
320
|
-
else if (url === "/api/threads-active") {
|
|
321
|
-
const open = consultation.listThreads({ status: "open" });
|
|
322
|
-
const resolving = consultation.listThreads({ status: "resolving" });
|
|
323
|
-
json(res, [...open, ...resolving]);
|
|
324
|
-
}
|
|
325
|
-
else if (url === "/api/hot-files") {
|
|
326
|
-
const { since_minutes } = body;
|
|
327
|
-
json(res, fileTracker.getHotFiles(since_minutes || 30));
|
|
328
|
-
}
|
|
329
|
-
else if (url === "/api/quota") {
|
|
330
|
-
// Pre-flight + live widget endpoint. 200 with fresh QuotaInfo when the
|
|
331
|
-
// Keychain + Anthropic API are reachable, 503 otherwise. Consumers treat
|
|
332
|
-
// 503 as "quota unknown = proceed" (fail-open) per the project decision.
|
|
333
|
-
const info = await quotaCache.get();
|
|
334
|
-
if (!info) {
|
|
335
|
-
const status = quotaCache.snapshot();
|
|
336
|
-
json(res, {
|
|
337
|
-
error: "quota unavailable",
|
|
338
|
-
reason: status.lastError,
|
|
339
|
-
cooldown_until: status.cooldownUntil,
|
|
340
|
-
}, 503);
|
|
341
|
-
}
|
|
342
|
-
else {
|
|
343
|
-
json(res, {
|
|
344
|
-
five_hour: info.fiveHour,
|
|
345
|
-
seven_day: info.sevenDay,
|
|
346
|
-
seven_day_sonnet: info.sevenDaySonnet,
|
|
347
|
-
fetched_at: info.fetchedAt,
|
|
348
|
-
});
|
|
349
|
-
}
|
|
350
|
-
}
|
|
351
|
-
else if (url === "/api/quota/refresh") {
|
|
352
|
-
// Force-refresh the cache, bypassing the TTL. Used by the dashboard's
|
|
353
|
-
// manual refresh button. The underlying quotaCache.refresh() is single-
|
|
354
|
-
// flight-deduped, so mashing the button doesn't stack parallel fetches.
|
|
355
|
-
// The onRefresh callback on the cache broadcasts via SSE + MQTT, so the
|
|
356
|
-
// dashboard receives the update through the normal channel too — this
|
|
357
|
-
// endpoint only exists for "give me the answer now" semantics.
|
|
358
|
-
const info = await quotaCache.refresh();
|
|
359
|
-
if (!info) {
|
|
360
|
-
const status = quotaCache.snapshot();
|
|
361
|
-
json(res, {
|
|
362
|
-
error: "quota unavailable",
|
|
363
|
-
reason: status.lastError,
|
|
364
|
-
cooldown_until: status.cooldownUntil,
|
|
365
|
-
}, 503);
|
|
366
|
-
}
|
|
367
|
-
else {
|
|
368
|
-
json(res, {
|
|
369
|
-
five_hour: info.fiveHour,
|
|
370
|
-
seven_day: info.sevenDay,
|
|
371
|
-
seven_day_sonnet: info.sevenDaySonnet,
|
|
372
|
-
fetched_at: info.fetchedAt,
|
|
373
|
-
});
|
|
374
|
-
}
|
|
375
|
-
}
|
|
376
|
-
else if (url === "/api/introspection-response") {
|
|
377
|
-
const { introspection_id, concerned, reason } = body;
|
|
378
|
-
const intro = introspection.respond(introspection_id, concerned, reason);
|
|
379
|
-
// If concerned, add to thread's expected_respondents
|
|
380
|
-
if (concerned && intro) {
|
|
381
|
-
const db = (await import("./database.js")).getDb();
|
|
382
|
-
const thread = consultation.getThread(intro.thread_id);
|
|
383
|
-
if (thread && (thread.status === "open" || thread.status === "resolving")) {
|
|
384
|
-
const respondents = JSON.parse(thread.expected_respondents || "[]");
|
|
385
|
-
if (!respondents.includes(intro.agent_id)) {
|
|
386
|
-
respondents.push(intro.agent_id);
|
|
387
|
-
db.prepare("UPDATE threads SET expected_respondents = ? WHERE id = ?")
|
|
388
|
-
.run(JSON.stringify(respondents), thread.id);
|
|
389
|
-
}
|
|
390
|
-
}
|
|
391
|
-
}
|
|
392
|
-
const agentInfo = registry.get(intro?.agent_id || "");
|
|
393
|
-
sseEmitter.emit("introspection_completed", {
|
|
394
|
-
introspection_id, thread_id: intro?.thread_id,
|
|
395
|
-
agent_id: intro?.agent_id, agent_name: agentInfo?.name || intro?.agent_id,
|
|
396
|
-
concerned, reason,
|
|
397
|
-
});
|
|
398
|
-
json(res, intro);
|
|
399
|
-
}
|
|
400
|
-
else if (url?.startsWith("/api/pending-introspections")) {
|
|
401
|
-
const urlObj = new URL(url, "http://localhost");
|
|
402
|
-
const agent_id = urlObj.searchParams.get("agent_id") || "";
|
|
403
|
-
const pending = introspection.getPending(agent_id);
|
|
404
|
-
json(res, pending);
|
|
405
|
-
}
|
|
406
|
-
else if (url === "/api/run-config") {
|
|
407
|
-
if (req.method === "POST") {
|
|
408
|
-
currentRunConfig = body;
|
|
409
|
-
sseEmitter.emit("run_config", currentRunConfig);
|
|
410
|
-
json(res, { ok: true });
|
|
411
|
-
}
|
|
412
|
-
else {
|
|
413
|
-
json(res, currentRunConfig || { active: false });
|
|
414
|
-
}
|
|
415
|
-
}
|
|
416
|
-
else if (url === "/api/reset") {
|
|
417
|
-
// Reset all tables for clean test run (disable FK checks to avoid ordering issues)
|
|
418
|
-
const db = (await import("./database.js")).getDb();
|
|
419
|
-
db.exec("PRAGMA foreign_keys = OFF");
|
|
420
|
-
db.exec("DELETE FROM introspections");
|
|
421
|
-
db.exec("DELETE FROM events");
|
|
422
|
-
db.exec("DELETE FROM thread_messages");
|
|
423
|
-
db.exec("DELETE FROM threads");
|
|
424
|
-
db.exec("DELETE FROM action_summaries");
|
|
425
|
-
db.exec("DELETE FROM file_activity");
|
|
426
|
-
db.exec("DELETE FROM agent_activity_status");
|
|
427
|
-
db.exec("DELETE FROM dependency_map");
|
|
428
|
-
db.exec("DELETE FROM agents");
|
|
429
|
-
db.exec("DELETE FROM revoked_agents");
|
|
430
|
-
db.exec("PRAGMA foreign_keys = ON");
|
|
431
|
-
currentRunConfig = null;
|
|
432
|
-
json(res, { ok: true });
|
|
433
|
-
}
|
|
434
|
-
else if (url === "/api/check-interrupt") {
|
|
435
|
-
const { agent_id } = body;
|
|
436
|
-
// Check for threads where this agent is an expected respondent and hasn't posted yet.
|
|
437
|
-
// Covers both open threads (waiting for initial response) and resolving threads
|
|
438
|
-
// (waiting for approval/contest of a proposed resolution).
|
|
439
|
-
const pendingThreads = [
|
|
440
|
-
...consultation.listThreads({ status: "open" }),
|
|
441
|
-
...consultation.listThreads({ status: "resolving" }),
|
|
442
|
-
].filter((t) => {
|
|
443
|
-
const respondents = JSON.parse(t.expected_respondents || "[]");
|
|
444
|
-
return respondents.includes(agent_id);
|
|
445
|
-
});
|
|
446
|
-
if (pendingThreads.length > 0) {
|
|
447
|
-
const details = pendingThreads.map((t) => ({
|
|
448
|
-
thread_id: t.id,
|
|
449
|
-
subject: t.subject,
|
|
450
|
-
initiator_id: t.initiator_id,
|
|
451
|
-
status: t.status,
|
|
452
|
-
target_files: JSON.parse(t.target_files || "[]"),
|
|
453
|
-
}));
|
|
454
|
-
json(res, { interrupt: true, threads: details });
|
|
455
|
-
}
|
|
456
|
-
else {
|
|
457
|
-
json(res, { interrupt: false });
|
|
458
|
-
}
|
|
459
|
-
}
|
|
460
|
-
else if (url?.startsWith("/api/agent-status/")) {
|
|
461
|
-
const agentId = url.split("/")[3];
|
|
462
|
-
const agent = registry.get(agentId);
|
|
463
|
-
if (!agent) {
|
|
464
|
-
json(res, { registered: false, status: "unknown" });
|
|
465
|
-
}
|
|
466
|
-
else {
|
|
467
|
-
const activity = activityTracker.getActivity(agentId, { idleAfterMinutes: 5 });
|
|
468
|
-
json(res, { registered: true, status: agent.status, activity: activity.activity_status });
|
|
469
|
-
}
|
|
470
|
-
}
|
|
471
|
-
else if (url === "/api/status") {
|
|
472
|
-
const online = registry.listOnline();
|
|
473
|
-
const openThreads = consultation.listThreads({ status: "open" });
|
|
474
|
-
json(res, {
|
|
475
|
-
online: online.length,
|
|
476
|
-
open_threads: openThreads.length,
|
|
477
|
-
hot_files: fileTracker.getHotFiles(30).length,
|
|
478
|
-
mqtt: services.mqttBridge.isConnected(),
|
|
479
|
-
});
|
|
480
|
-
}
|
|
481
|
-
else {
|
|
482
|
-
json(res, { error: "not found" }, 404);
|
|
483
|
-
}
|
|
75
|
+
const ctx = {
|
|
76
|
+
services,
|
|
77
|
+
httpLog,
|
|
78
|
+
authEnabled: AUTH_ENABLED,
|
|
79
|
+
getRunConfig: () => currentRunConfig,
|
|
80
|
+
setRunConfig: (cfg) => { currentRunConfig = cfg; },
|
|
81
|
+
};
|
|
82
|
+
return handleRestExt(req, res, ctx);
|
|
484
83
|
}
|
|
485
84
|
async function handleAuth(req, res) {
|
|
486
85
|
const url = req.url || "";
|
|
@@ -568,6 +167,16 @@ function writeSseEvent(res, event) {
|
|
|
568
167
|
const data = injectTimestamp(event.payload, event.created_at ?? new Date().toISOString());
|
|
569
168
|
res.write(`id: ${event.id}\nevent: ${event.type}\ndata: ${data}\n\n`);
|
|
570
169
|
}
|
|
170
|
+
// P3: heartbeat interval in ms. Default 30s — well under nginx/Cloudflare's
|
|
171
|
+
// typical 60s idle SSE timeout, but infrequent enough to add negligible
|
|
172
|
+
// bandwidth (one ":keep-alive\n\n" comment is ~16 bytes).
|
|
173
|
+
const SSE_HEARTBEAT_MS = (() => {
|
|
174
|
+
const raw = process.env.COORDINATOR_SSE_HEARTBEAT_MS;
|
|
175
|
+
if (!raw)
|
|
176
|
+
return 30_000;
|
|
177
|
+
const n = parseInt(raw, 10);
|
|
178
|
+
return Number.isFinite(n) && n > 0 ? n : 30_000;
|
|
179
|
+
})();
|
|
571
180
|
function handleSse(req, res) {
|
|
572
181
|
res.writeHead(200, {
|
|
573
182
|
"Content-Type": "text/event-stream",
|
|
@@ -587,11 +196,35 @@ function handleSse(req, res) {
|
|
|
587
196
|
const unsubscribe = services.sseEmitter.addListener((event) => {
|
|
588
197
|
writeSseEvent(res, event);
|
|
589
198
|
});
|
|
590
|
-
|
|
199
|
+
// P3: heartbeat. Browsers ignore the `:` comment line per the SSE spec,
|
|
200
|
+
// but it counts as activity for intermediate proxies that would otherwise
|
|
201
|
+
// kill an idle connection after ~60s. Wrapped in try/catch because once
|
|
202
|
+
// the socket is half-closed res.write throws synchronously.
|
|
203
|
+
const heartbeat = setInterval(() => {
|
|
204
|
+
try {
|
|
205
|
+
res.write(":keep-alive\n\n");
|
|
206
|
+
}
|
|
207
|
+
catch {
|
|
208
|
+
// Connection already torn down — req.on("close") will clean up shortly.
|
|
209
|
+
}
|
|
210
|
+
}, SSE_HEARTBEAT_MS);
|
|
211
|
+
// Don't keep the event loop alive solely for heartbeats; without unref()
|
|
212
|
+
// a still-open SSE connection at process shutdown delays exit.
|
|
213
|
+
if (typeof heartbeat.unref === "function")
|
|
214
|
+
heartbeat.unref();
|
|
215
|
+
req.on("close", () => {
|
|
216
|
+
// P3: clear the interval BEFORE unsubscribing so a heartbeat tick that
|
|
217
|
+
// fires between close and unsubscribe can't write to a dead socket.
|
|
218
|
+
clearInterval(heartbeat);
|
|
219
|
+
unsubscribe();
|
|
220
|
+
});
|
|
591
221
|
}
|
|
592
222
|
export async function startServer(opts) {
|
|
593
223
|
const port = opts?.port ?? PORT;
|
|
594
224
|
const dataDir = opts?.dataDir ?? DATA_DIR;
|
|
225
|
+
// Resolve MQTT ports per-call so tests/embedders can override module-load env values.
|
|
226
|
+
const mqttTcpPort = opts?.mqttTcpPort ?? MQTT_TCP_PORT;
|
|
227
|
+
const mqttWsPath = opts?.mqttWsPath ?? MQTT_WS_PATH;
|
|
595
228
|
services = createServices({ dataDir });
|
|
596
229
|
const log = services.logger;
|
|
597
230
|
httpLog = log.child({ component: "http" });
|
|
@@ -638,10 +271,19 @@ export async function startServer(opts) {
|
|
|
638
271
|
json(res, { error: "dashboard not available" }, 404);
|
|
639
272
|
return;
|
|
640
273
|
}
|
|
641
|
-
|
|
642
|
-
|
|
643
|
-
|
|
644
|
-
|
|
274
|
+
// B5 fix: defend against path traversal. safeJoinUnderRoot decodes the
|
|
275
|
+
// URL, strips leading slashes, resolves the path, and verifies the
|
|
276
|
+
// result stays under dashboardDir. Returns null on traversal attempts.
|
|
277
|
+
let filePath;
|
|
278
|
+
if (url === "/dashboard" || url === "/dashboard/") {
|
|
279
|
+
filePath = path.join(dashboardDir, "index.html");
|
|
280
|
+
}
|
|
281
|
+
else {
|
|
282
|
+
// Strip query string before joining (browsers append ?v=...)
|
|
283
|
+
const urlPath = (url.split("?")[0] || "").replace("/dashboard/", "");
|
|
284
|
+
filePath = safeJoinUnderRoot(dashboardDir, urlPath);
|
|
285
|
+
}
|
|
286
|
+
if (filePath && existsSync(filePath)) {
|
|
645
287
|
const ext = path.extname(filePath);
|
|
646
288
|
const contentTypes = {
|
|
647
289
|
".html": "text/html",
|
|
@@ -739,30 +381,131 @@ export async function startServer(opts) {
|
|
|
739
381
|
// Start the embedded MQTT broker (TCP + WebSocket on HTTP upgrade).
|
|
740
382
|
// Awaiting ensures the TCP listener is fully bound before we connect our
|
|
741
383
|
// own client or tell users the coordinator is ready.
|
|
742
|
-
|
|
743
|
-
|
|
384
|
+
// B3 fix: when AUTH_ENABLED, gate every MQTT CONNECT by JWT in the password
|
|
385
|
+
// field. Anonymous connections are rejected. Default off (essaim and any
|
|
386
|
+
// client without auth keep working unchanged).
|
|
387
|
+
const mqttAuth = AUTH_ENABLED
|
|
388
|
+
? async (_username, password) => {
|
|
389
|
+
if (!password)
|
|
390
|
+
return false;
|
|
391
|
+
try {
|
|
392
|
+
await verifyToken(password.toString("utf-8"));
|
|
393
|
+
return true;
|
|
394
|
+
}
|
|
395
|
+
catch {
|
|
396
|
+
return false;
|
|
397
|
+
}
|
|
398
|
+
}
|
|
399
|
+
: undefined;
|
|
400
|
+
const broker = await startEmbeddedMqttBroker({
|
|
401
|
+
tcpPort: mqttTcpPort,
|
|
744
402
|
httpServer,
|
|
745
|
-
wsPath:
|
|
403
|
+
wsPath: mqttWsPath,
|
|
746
404
|
logger: log.child({ component: "mqtt-broker" }),
|
|
405
|
+
authenticate: mqttAuth,
|
|
406
|
+
});
|
|
407
|
+
// B3: when AUTH_ENABLED, the internal coordinator client must authenticate
|
|
408
|
+
// too. Mint a short-lived admin token for the bridge.
|
|
409
|
+
const internalToken = AUTH_ENABLED ? await createToken("coordinator-internal", "admin", "1h") : undefined;
|
|
410
|
+
await services.mqttBridge.connect({
|
|
411
|
+
url: `mqtt://127.0.0.1:${mqttTcpPort}`,
|
|
412
|
+
username: AUTH_ENABLED ? "coordinator-internal" : undefined,
|
|
413
|
+
password: internalToken,
|
|
414
|
+
// P1 fix: stable agent identity for LWT topic
|
|
415
|
+
// (`coordinator/agents/coordinator-internal/status`).
|
|
416
|
+
agentId: "coordinator-internal",
|
|
747
417
|
});
|
|
748
|
-
// Connect the coordinator's own MQTT client to the embedded broker BEFORE
|
|
749
|
-
// the HTTP server accepts requests — agents shouldn't see a half-ready coordinator.
|
|
750
|
-
await services.mqttBridge.connect({ url: `mqtt://127.0.0.1:${MQTT_TCP_PORT}` });
|
|
751
418
|
services.mqttBridge.onOffline((agentId) => {
|
|
752
419
|
services.registry.setOffline(agentId);
|
|
753
420
|
services.consultation.handleAgentDeparture(agentId);
|
|
754
421
|
services.sseEmitter.emit("agent_offline", { agent_id: agentId });
|
|
755
422
|
});
|
|
756
|
-
|
|
757
|
-
|
|
758
|
-
|
|
759
|
-
|
|
760
|
-
|
|
761
|
-
|
|
762
|
-
|
|
763
|
-
|
|
764
|
-
|
|
423
|
+
// Wait for the HTTP server to be actually listening before resolving the
|
|
424
|
+
// returned handle. Otherwise callers (tests, essaim) may try to connect
|
|
425
|
+
// before the port is bound.
|
|
426
|
+
await new Promise((resolve, reject) => {
|
|
427
|
+
const onError = (err) => reject(err);
|
|
428
|
+
httpServer.once("error", onError);
|
|
429
|
+
httpServer.listen(port, () => {
|
|
430
|
+
httpServer.off("error", onError);
|
|
431
|
+
log.info({
|
|
432
|
+
port,
|
|
433
|
+
mcp: `POST http://localhost:${port}/mcp`,
|
|
434
|
+
rest: `POST http://localhost:${port}/api/*`,
|
|
435
|
+
sse: `GET http://localhost:${port}/api/events`,
|
|
436
|
+
mqtt_tcp: `mqtt://127.0.0.1:${mqttTcpPort}`,
|
|
437
|
+
mqtt_ws: `ws://localhost:${port}${mqttWsPath}`,
|
|
438
|
+
}, "Coordinator v3 started");
|
|
439
|
+
resolve();
|
|
440
|
+
});
|
|
765
441
|
});
|
|
442
|
+
// B2 fix: start the consultation timeout sweeper.
|
|
443
|
+
// Reads no longer mutate state — this background tick handles timeouts.
|
|
444
|
+
services.consultation.startTimeoutSweeper();
|
|
445
|
+
// B6 fix: graceful shutdown.
|
|
446
|
+
// Cleanup sequence: stop accepting new HTTP connections → end MQTT bridge →
|
|
447
|
+
// close MQTT broker → stop quota background timer → close DB.
|
|
448
|
+
// Idempotent: stopped flag prevents double-cleanup if SIGTERM races with
|
|
449
|
+
// an explicit handle.stop() call.
|
|
450
|
+
let stopped = false;
|
|
451
|
+
const stop = async () => {
|
|
452
|
+
if (stopped)
|
|
453
|
+
return;
|
|
454
|
+
stopped = true;
|
|
455
|
+
log.info("Coordinator shutting down...");
|
|
456
|
+
try {
|
|
457
|
+
await new Promise((resolve) => httpServer.close(() => resolve()));
|
|
458
|
+
}
|
|
459
|
+
catch (err) {
|
|
460
|
+
log.warn({ err }, "Error closing HTTP server");
|
|
461
|
+
}
|
|
462
|
+
try {
|
|
463
|
+
await services.mqttBridge.disconnect();
|
|
464
|
+
}
|
|
465
|
+
catch (err) {
|
|
466
|
+
log.warn({ err }, "Error disconnecting MQTT bridge");
|
|
467
|
+
}
|
|
468
|
+
try {
|
|
469
|
+
await broker.close();
|
|
470
|
+
}
|
|
471
|
+
catch (err) {
|
|
472
|
+
log.warn({ err }, "Error closing MQTT broker");
|
|
473
|
+
}
|
|
474
|
+
try {
|
|
475
|
+
services.quotaCache.stopBackgroundTick();
|
|
476
|
+
}
|
|
477
|
+
catch (err) {
|
|
478
|
+
log.warn({ err }, "Error stopping quota timer");
|
|
479
|
+
}
|
|
480
|
+
try {
|
|
481
|
+
services.consultation.stopTimeoutSweeper();
|
|
482
|
+
}
|
|
483
|
+
catch (err) {
|
|
484
|
+
log.warn({ err }, "Error stopping timeout sweeper");
|
|
485
|
+
}
|
|
486
|
+
try {
|
|
487
|
+
const { closeDb } = await import("./database.js");
|
|
488
|
+
closeDb?.();
|
|
489
|
+
}
|
|
490
|
+
catch (err) {
|
|
491
|
+
log.warn({ err }, "Error closing database");
|
|
492
|
+
}
|
|
493
|
+
log.info("Coordinator shutdown complete");
|
|
494
|
+
};
|
|
495
|
+
// Register signal handlers (default true). Embedders can opt out via
|
|
496
|
+
// registerSignalHandlers: false to manage their own teardown.
|
|
497
|
+
if (opts?.registerSignalHandlers !== false) {
|
|
498
|
+
const onSignal = (signal) => {
|
|
499
|
+
log.info({ signal }, "Received shutdown signal");
|
|
500
|
+
stop().then(() => process.exit(0)).catch((err) => {
|
|
501
|
+
log.error({ err }, "Shutdown error, forcing exit");
|
|
502
|
+
process.exit(1);
|
|
503
|
+
});
|
|
504
|
+
};
|
|
505
|
+
process.once("SIGTERM", () => onSignal("SIGTERM"));
|
|
506
|
+
process.once("SIGINT", () => onSignal("SIGINT"));
|
|
507
|
+
}
|
|
508
|
+
return { port, stop };
|
|
766
509
|
}
|
|
767
510
|
// Auto-start when run directly (not imported)
|
|
768
511
|
const isMainModule = process.argv[1]?.endsWith("serve-http.ts") || process.argv[1]?.endsWith("serve-http.js");
|