agent-coord-mcp 0.7.1 → 0.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +24 -6
- package/dist/server.js +3 -1
- package/dist/server.js.map +1 -1
- package/dist/tools.js +488 -17
- package/dist/tools.js.map +1 -1
- package/hooks/peek-coord.mjs +5 -1
- package/hooks/tmux-pusher.mjs +73 -4
- package/package.json +1 -1
- package/scripts/coord-pusher.mjs +63 -4
- package/src/server.ts +18 -0
- package/src/tools.ts +509 -15
package/dist/tools.js
CHANGED
|
@@ -5,7 +5,7 @@ import { spawn, spawnSync } from "node:child_process";
|
|
|
5
5
|
import { fileURLToPath } from "node:url";
|
|
6
6
|
import { z } from "zod";
|
|
7
7
|
import path from "node:path";
|
|
8
|
-
import { AGENTS_FILE, CURSOR_DIR, DEFAULT_ROOM, INBOX_DIR, ROOMS_FILE, STATUS_FILE, addMember, appendJsonl, cursorFile, deleteFile, ensureRoom, fileSize, getRooms, inboxFile, listCursorFiles, listInboxFiles, listTransportFiles, logFile, memberRooms, normalizeRoom, pidFile, readJson, readJsonl, removeMember, rewriteJsonl, roomFile, rotateAgentToken, setRoomMeta, transportFile, TRANSPORT_DIR, updateJson, } from "./store.js";
|
|
8
|
+
import { AGENTS_FILE, CURSOR_DIR, DEFAULT_ROOM, INBOX_DIR, ROOT, ROOM_FILE, ROOMS_DIR, ROOMS_FILE, STATUS_FILE, addMember, appendJsonl, cursorFile, deleteFile, ensureRoom, fileSize, getRooms, inboxFile, listCursorFiles, listInboxFiles, listTransportFiles, logFile, memberRooms, normalizeRoom, pidFile, readJson, readJsonl, removeMember, rewriteJsonl, roomFile, rotateAgentToken, setRoomMeta, transportFile, TRANSPORT_DIR, updateJson, } from "./store.js";
|
|
9
9
|
// Resolve the physical file for a (source, agent, channel) tuple.
|
|
10
10
|
function sourceFile(source, agentId, room) {
|
|
11
11
|
if (source === "inbox")
|
|
@@ -152,27 +152,12 @@ export async function listAgentsTool() {
|
|
|
152
152
|
}
|
|
153
153
|
async function loadLiveTransports() {
|
|
154
154
|
const out = new Map();
|
|
155
|
-
// For remote markers we can't pid-check the foreign process — instead we trust
|
|
156
|
-
// the registry's lastHeartbeat, which the remote pusher refreshes every minute.
|
|
157
155
|
const reg = await readJson(AGENTS_FILE, {});
|
|
158
156
|
const now = Date.now();
|
|
159
157
|
for (const fname of await listTransportFiles()) {
|
|
160
158
|
const file = path.join(TRANSPORT_DIR, fname);
|
|
161
159
|
const marker = await readJson(file, null);
|
|
162
|
-
if (!marker) {
|
|
163
|
-
await deleteFile(file);
|
|
164
|
-
continue;
|
|
165
|
-
}
|
|
166
|
-
const isRemote = marker.transport === "tmux-push-remote";
|
|
167
|
-
if (isRemote) {
|
|
168
|
-
const entry = reg[marker.agentId];
|
|
169
|
-
const fresh = !!entry && now - entry.lastHeartbeat < STALE_MS;
|
|
170
|
-
if (!fresh) {
|
|
171
|
-
await deleteFile(file);
|
|
172
|
-
continue;
|
|
173
|
-
}
|
|
174
|
-
}
|
|
175
|
-
else if (!isPidAlive(marker.pid)) {
|
|
160
|
+
if (!marker || !isMarkerLive(marker, reg, now)) {
|
|
176
161
|
await deleteFile(file);
|
|
177
162
|
continue;
|
|
178
163
|
}
|
|
@@ -180,6 +165,16 @@ async function loadLiveTransports() {
|
|
|
180
165
|
}
|
|
181
166
|
return out;
|
|
182
167
|
}
|
|
168
|
+
// Liveness for a transport marker. Local markers carry a real pid we can probe;
|
|
169
|
+
// remote markers (tmux-push-remote, pid 0 on a foreign host) can't be — so we
|
|
170
|
+
// trust the registry heartbeat the remote pusher refreshes (within STALE_MS).
|
|
171
|
+
function isMarkerLive(marker, reg, now) {
|
|
172
|
+
if (marker.transport === "tmux-push-remote") {
|
|
173
|
+
const entry = reg[marker.agentId];
|
|
174
|
+
return !!entry && now - entry.lastHeartbeat < STALE_MS;
|
|
175
|
+
}
|
|
176
|
+
return isPidAlive(marker.pid);
|
|
177
|
+
}
|
|
183
178
|
function isPidAlive(pid) {
|
|
184
179
|
if (!pid || pid <= 0)
|
|
185
180
|
return false;
|
|
@@ -235,6 +230,110 @@ export async function sendMessageTool(args) {
|
|
|
235
230
|
await appendJsonl(target, msg);
|
|
236
231
|
return { ok: true, id: msg.id, target, room: chan };
|
|
237
232
|
}
|
|
233
|
+
// ---------- send_command (context-management control commands) ----------
|
|
234
|
+
// The only slash commands a lead may inject into a sub-agent's CLI. Locked on
|
|
235
|
+
// purpose: these wipe/compact context (cheap, reversible-by-the-agent), nothing
|
|
236
|
+
// that mutates the repo or the bus. Stored WITHOUT the leading slash; the wire
|
|
237
|
+
// text is `/${cmd}`.
|
|
238
|
+
export const CONTROL_COMMANDS = ["clear", "compact"];
|
|
239
|
+
// Transports whose pusher can actually TYPE a slash command into a live CLI.
|
|
240
|
+
// A control command is meaningless to a plain MCP poller, so send_command is
|
|
241
|
+
// gated to agents currently attached over one of these.
|
|
242
|
+
const TMUX_TRANSPORTS = new Set(["tmux-push", "tmux-push-remote"]);
|
|
243
|
+
// Normalize "clear" / "/clear" / " /Clear " → "clear"; null if not allowlisted.
|
|
244
|
+
function normalizeControlCommand(raw) {
|
|
245
|
+
const c = raw.trim().replace(/^\/+/, "").toLowerCase();
|
|
246
|
+
return CONTROL_COMMANDS.includes(c) ? c : null;
|
|
247
|
+
}
|
|
248
|
+
// Live transports filtered to the tmux-push family (local + remote).
|
|
249
|
+
async function liveTmuxTargets() {
|
|
250
|
+
const all = await loadLiveTransports();
|
|
251
|
+
const out = new Map();
|
|
252
|
+
for (const [id, m] of all)
|
|
253
|
+
if (TMUX_TRANSPORTS.has(m.transport))
|
|
254
|
+
out.set(id, m);
|
|
255
|
+
return out;
|
|
256
|
+
}
|
|
257
|
+
export const sendCommandSchema = {
|
|
258
|
+
from: z.string().min(1),
|
|
259
|
+
to: z.string().optional(),
|
|
260
|
+
room: z.string().optional(),
|
|
261
|
+
command: z.string().min(1),
|
|
262
|
+
};
|
|
263
|
+
// Inject a context-management slash command into a sub-agent's live tmux
|
|
264
|
+
// session. Writes a control-flagged message the pushers deliver RAW (no banner,
|
|
265
|
+
// no `[DM …]` prefix) so the receiving CLI runs it as a real slash command.
|
|
266
|
+
// Hard-gated to tmux: refuses unless the target(s) have a live tmux-push(-remote)
|
|
267
|
+
// transport, so a command never rots unexecuted in an offline inbox.
|
|
268
|
+
export async function sendCommandTool(args) {
|
|
269
|
+
const cmd = normalizeControlCommand(args.command);
|
|
270
|
+
if (!cmd) {
|
|
271
|
+
return {
|
|
272
|
+
ok: false,
|
|
273
|
+
error: `unsupported command '${args.command}'. Allowed: ${CONTROL_COMMANDS.map((c) => "/" + c).join(", ")}`,
|
|
274
|
+
};
|
|
275
|
+
}
|
|
276
|
+
if (!args.to && !args.room) {
|
|
277
|
+
return { ok: false, error: "specify 'to' (a single agent) or 'room' (a channel's tmux-attached members)" };
|
|
278
|
+
}
|
|
279
|
+
if (args.to && args.room) {
|
|
280
|
+
return { ok: false, error: "specify only one of 'to' or 'room'" };
|
|
281
|
+
}
|
|
282
|
+
const text = `/${cmd}`;
|
|
283
|
+
const liveTmux = await liveTmuxTargets();
|
|
284
|
+
// DM: target must itself be tmux-attached.
|
|
285
|
+
if (args.to) {
|
|
286
|
+
const marker = liveTmux.get(args.to);
|
|
287
|
+
if (!marker) {
|
|
288
|
+
return {
|
|
289
|
+
ok: false,
|
|
290
|
+
error: `'${args.to}' has no live tmux-push transport — control commands can only be injected into a tmux session. Attach it (join/attach_agent) or target an attached agent.`,
|
|
291
|
+
};
|
|
292
|
+
}
|
|
293
|
+
const msg = {
|
|
294
|
+
id: randomUUID(),
|
|
295
|
+
ts: Date.now(),
|
|
296
|
+
from: args.from,
|
|
297
|
+
to: args.to,
|
|
298
|
+
text,
|
|
299
|
+
control: true,
|
|
300
|
+
};
|
|
301
|
+
const target = inboxFile(args.to);
|
|
302
|
+
await appendJsonl(target, msg);
|
|
303
|
+
return { ok: true, id: msg.id, command: text, target, delivered: [args.to], transport: marker.transport };
|
|
304
|
+
}
|
|
305
|
+
// Room: broadcast to every tmux-attached member (never the sender itself).
|
|
306
|
+
const chan = normalizeRoom(args.room);
|
|
307
|
+
const rooms = await getRooms();
|
|
308
|
+
const members = rooms[chan]?.members ?? [];
|
|
309
|
+
const delivered = members.filter((m) => m !== args.from && liveTmux.has(m));
|
|
310
|
+
if (delivered.length === 0) {
|
|
311
|
+
return {
|
|
312
|
+
ok: false,
|
|
313
|
+
error: `no tmux-attached members in #${chan} to receive '${text}' (${members.length} member(s) total). Control commands only fire in a live tmux session.`,
|
|
314
|
+
};
|
|
315
|
+
}
|
|
316
|
+
const skipped = members.filter((m) => m !== args.from && !liveTmux.has(m));
|
|
317
|
+
const msg = {
|
|
318
|
+
id: randomUUID(),
|
|
319
|
+
ts: Date.now(),
|
|
320
|
+
from: args.from,
|
|
321
|
+
room: chan,
|
|
322
|
+
text,
|
|
323
|
+
control: true,
|
|
324
|
+
};
|
|
325
|
+
const target = roomFile(chan);
|
|
326
|
+
await appendJsonl(target, msg);
|
|
327
|
+
return {
|
|
328
|
+
ok: true,
|
|
329
|
+
id: msg.id,
|
|
330
|
+
command: text,
|
|
331
|
+
target,
|
|
332
|
+
room: chan,
|
|
333
|
+
delivered,
|
|
334
|
+
skipped: skipped.length ? skipped : undefined,
|
|
335
|
+
};
|
|
336
|
+
}
|
|
238
337
|
// ---------- read_messages ----------
|
|
239
338
|
export const readMessagesSchema = {
|
|
240
339
|
agentId: z.string().min(1),
|
|
@@ -927,6 +1026,378 @@ export async function clearTransportTool(args) {
|
|
|
927
1026
|
const removed = await deleteFile(transportFile(args.agentId));
|
|
928
1027
|
return { ok: true, removed };
|
|
929
1028
|
}
|
|
1029
|
+
// Count non-empty lines vs successfully-parsed entries in a JSONL file.
|
|
1030
|
+
// Offsets index the PARSED entries (see readJsonl), so `parsed` is the figure
|
|
1031
|
+
// cursor math is compared against; `malformed` is the desync risk.
|
|
1032
|
+
async function scanJsonl(file) {
|
|
1033
|
+
if (!existsSync(file))
|
|
1034
|
+
return { lines: 0, parsed: 0, malformed: 0 };
|
|
1035
|
+
const raw = await fsp.readFile(file, "utf8");
|
|
1036
|
+
let lines = 0;
|
|
1037
|
+
let parsed = 0;
|
|
1038
|
+
for (const line of raw.split("\n")) {
|
|
1039
|
+
if (!line.trim())
|
|
1040
|
+
continue;
|
|
1041
|
+
lines++;
|
|
1042
|
+
try {
|
|
1043
|
+
JSON.parse(line);
|
|
1044
|
+
parsed++;
|
|
1045
|
+
}
|
|
1046
|
+
catch {
|
|
1047
|
+
// malformed
|
|
1048
|
+
}
|
|
1049
|
+
}
|
|
1050
|
+
return { lines, parsed, malformed: lines - parsed };
|
|
1051
|
+
}
|
|
1052
|
+
// Find leftover proper-lockfile lock dirs (`<file>.lock`) across the state
|
|
1053
|
+
// dirs. Anything older than the threshold is almost certainly orphaned by a
|
|
1054
|
+
// crashed writer (withLock's stale window is 5s).
|
|
1055
|
+
async function scanStaleLocks(olderThanMs, now) {
|
|
1056
|
+
const out = [];
|
|
1057
|
+
const dirs = [ROOT, INBOX_DIR, CURSOR_DIR, ROOMS_DIR, TRANSPORT_DIR];
|
|
1058
|
+
for (const dir of dirs) {
|
|
1059
|
+
if (!existsSync(dir))
|
|
1060
|
+
continue;
|
|
1061
|
+
let names;
|
|
1062
|
+
try {
|
|
1063
|
+
names = await fsp.readdir(dir);
|
|
1064
|
+
}
|
|
1065
|
+
catch {
|
|
1066
|
+
continue;
|
|
1067
|
+
}
|
|
1068
|
+
for (const name of names) {
|
|
1069
|
+
if (!name.endsWith(".lock"))
|
|
1070
|
+
continue;
|
|
1071
|
+
const p = path.join(dir, name);
|
|
1072
|
+
try {
|
|
1073
|
+
const st = await fsp.stat(p);
|
|
1074
|
+
const ageMs = now - st.mtimeMs;
|
|
1075
|
+
if (ageMs > olderThanMs)
|
|
1076
|
+
out.push({ path: p, ageMs });
|
|
1077
|
+
}
|
|
1078
|
+
catch {
|
|
1079
|
+
// vanished mid-scan
|
|
1080
|
+
}
|
|
1081
|
+
}
|
|
1082
|
+
}
|
|
1083
|
+
return out;
|
|
1084
|
+
}
|
|
1085
|
+
export const doctorSchema = {
|
|
1086
|
+
fix: z.boolean().optional(),
|
|
1087
|
+
maxFileBytes: z.number().int().positive().optional(),
|
|
1088
|
+
};
|
|
1089
|
+
export async function doctorTool(args) {
|
|
1090
|
+
const fix = args.fix ?? false;
|
|
1091
|
+
const maxBytes = args.maxFileBytes ?? 5 * 1024 * 1024;
|
|
1092
|
+
const now = Date.now();
|
|
1093
|
+
const findings = [];
|
|
1094
|
+
const fixed = [];
|
|
1095
|
+
const reg = await readJson(AGENTS_FILE, {});
|
|
1096
|
+
const known = new Set(Object.keys(reg));
|
|
1097
|
+
const rooms = await getRooms();
|
|
1098
|
+
const channels = Object.keys(rooms);
|
|
1099
|
+
// 1. Orphan transport markers (dead local pid, or stale remote heartbeat).
|
|
1100
|
+
{
|
|
1101
|
+
const dead = [];
|
|
1102
|
+
for (const fname of await listTransportFiles()) {
|
|
1103
|
+
const file = path.join(TRANSPORT_DIR, fname);
|
|
1104
|
+
const marker = await readJson(file, null);
|
|
1105
|
+
if (!marker || !isMarkerLive(marker, reg, now)) {
|
|
1106
|
+
dead.push(file);
|
|
1107
|
+
if (fix) {
|
|
1108
|
+
await deleteFile(file);
|
|
1109
|
+
fixed.push(`deleted stale transport marker ${fname}`);
|
|
1110
|
+
}
|
|
1111
|
+
}
|
|
1112
|
+
}
|
|
1113
|
+
findings.push({
|
|
1114
|
+
check: "orphan-transport-markers",
|
|
1115
|
+
level: dead.length ? "warn" : "ok",
|
|
1116
|
+
detail: dead.length ? `${dead.length} stale transport marker(s) (dead pid or expired remote heartbeat)` : "no stale transport markers",
|
|
1117
|
+
fixable: true,
|
|
1118
|
+
items: dead.length ? dead.map((f) => path.basename(f)) : undefined,
|
|
1119
|
+
});
|
|
1120
|
+
}
|
|
1121
|
+
// 2. Orphan room memberships (member not in the registry).
|
|
1122
|
+
{
|
|
1123
|
+
const orphans = new Set();
|
|
1124
|
+
for (const e of Object.values(rooms)) {
|
|
1125
|
+
for (const m of e.members ?? [])
|
|
1126
|
+
if (!known.has(m))
|
|
1127
|
+
orphans.add(m);
|
|
1128
|
+
}
|
|
1129
|
+
if (fix && orphans.size) {
|
|
1130
|
+
await updateJson(ROOMS_FILE, {}, (cur) => {
|
|
1131
|
+
for (const e of Object.values(cur)) {
|
|
1132
|
+
if (e.members?.length)
|
|
1133
|
+
e.members = e.members.filter((m) => known.has(m));
|
|
1134
|
+
}
|
|
1135
|
+
return cur;
|
|
1136
|
+
});
|
|
1137
|
+
fixed.push(`dropped ${orphans.size} orphan membership(s): ${[...orphans].join(", ")}`);
|
|
1138
|
+
}
|
|
1139
|
+
findings.push({
|
|
1140
|
+
check: "orphan-room-memberships",
|
|
1141
|
+
level: orphans.size ? "warn" : "ok",
|
|
1142
|
+
detail: orphans.size ? `${orphans.size} channel member(s) not in the registry` : "all channel members are registered",
|
|
1143
|
+
fixable: true,
|
|
1144
|
+
items: orphans.size ? [...orphans] : undefined,
|
|
1145
|
+
});
|
|
1146
|
+
}
|
|
1147
|
+
// 3. Orphan inbox / cursor files (owner not registered).
|
|
1148
|
+
{
|
|
1149
|
+
const orphanInbox = [];
|
|
1150
|
+
for (const fname of await listInboxFiles()) {
|
|
1151
|
+
const id = fname.replace(/\.jsonl$/, "");
|
|
1152
|
+
if (!known.has(id)) {
|
|
1153
|
+
orphanInbox.push(id);
|
|
1154
|
+
if (fix) {
|
|
1155
|
+
await deleteFile(path.join(INBOX_DIR, fname));
|
|
1156
|
+
fixed.push(`deleted orphan inbox ${fname}`);
|
|
1157
|
+
}
|
|
1158
|
+
}
|
|
1159
|
+
}
|
|
1160
|
+
const orphanCursor = [];
|
|
1161
|
+
for (const fname of await listCursorFiles()) {
|
|
1162
|
+
const id = fname.replace(/\.json$/, "");
|
|
1163
|
+
if (!known.has(id)) {
|
|
1164
|
+
orphanCursor.push(id);
|
|
1165
|
+
if (fix) {
|
|
1166
|
+
await deleteFile(path.join(CURSOR_DIR, fname));
|
|
1167
|
+
fixed.push(`deleted orphan cursor ${fname}`);
|
|
1168
|
+
}
|
|
1169
|
+
}
|
|
1170
|
+
}
|
|
1171
|
+
const total = orphanInbox.length + orphanCursor.length;
|
|
1172
|
+
findings.push({
|
|
1173
|
+
check: "orphan-inboxes-cursors",
|
|
1174
|
+
level: total ? "warn" : "ok",
|
|
1175
|
+
detail: total
|
|
1176
|
+
? `${orphanInbox.length} inbox + ${orphanCursor.length} cursor file(s) for unregistered ids`
|
|
1177
|
+
: "no orphan inbox/cursor files",
|
|
1178
|
+
fixable: true,
|
|
1179
|
+
items: total ? [...new Set([...orphanInbox, ...orphanCursor])] : undefined,
|
|
1180
|
+
});
|
|
1181
|
+
}
|
|
1182
|
+
// Precompute parsed line counts for cursor + malformed checks.
|
|
1183
|
+
const counts = new Map();
|
|
1184
|
+
const countFor = async (file) => {
|
|
1185
|
+
if (!counts.has(file))
|
|
1186
|
+
counts.set(file, await scanJsonl(file));
|
|
1187
|
+
return counts.get(file);
|
|
1188
|
+
};
|
|
1189
|
+
// 4. Cursor offsets past end-of-file (would return [] forever).
|
|
1190
|
+
{
|
|
1191
|
+
const broken = [];
|
|
1192
|
+
for (const fname of await listCursorFiles()) {
|
|
1193
|
+
const id = fname.replace(/\.json$/, "");
|
|
1194
|
+
const cursorPath = path.join(CURSOR_DIR, fname);
|
|
1195
|
+
const cursor = await readJson(cursorPath, {});
|
|
1196
|
+
const overflow = [];
|
|
1197
|
+
const inboxMax = (await countFor(inboxFile(id))).parsed;
|
|
1198
|
+
if ((cursor.inboxOffset ?? 0) > inboxMax)
|
|
1199
|
+
overflow.push(`inboxOffset ${cursor.inboxOffset}>${inboxMax}`);
|
|
1200
|
+
const roomMax = (await countFor(ROOM_FILE)).parsed;
|
|
1201
|
+
if ((cursor.roomOffset ?? 0) > roomMax)
|
|
1202
|
+
overflow.push(`roomOffset ${cursor.roomOffset}>${roomMax}`);
|
|
1203
|
+
const statusMax = (await countFor(STATUS_FILE)).parsed;
|
|
1204
|
+
if ((cursor.statusOffset ?? 0) > statusMax)
|
|
1205
|
+
overflow.push(`statusOffset ${cursor.statusOffset}>${statusMax}`);
|
|
1206
|
+
for (const [chan, off] of Object.entries(cursor.roomOffsets ?? {})) {
|
|
1207
|
+
const max = (await countFor(roomFile(chan))).parsed;
|
|
1208
|
+
if (off > max)
|
|
1209
|
+
overflow.push(`roomOffsets[${chan}] ${off}>${max}`);
|
|
1210
|
+
}
|
|
1211
|
+
if (overflow.length) {
|
|
1212
|
+
broken.push(`${id}: ${overflow.join(", ")}`);
|
|
1213
|
+
if (fix) {
|
|
1214
|
+
await updateJson(cursorPath, {}, (c) => {
|
|
1215
|
+
if ((c.inboxOffset ?? 0) > inboxMax)
|
|
1216
|
+
c.inboxOffset = inboxMax;
|
|
1217
|
+
if ((c.roomOffset ?? 0) > roomMax)
|
|
1218
|
+
c.roomOffset = roomMax;
|
|
1219
|
+
if ((c.statusOffset ?? 0) > statusMax)
|
|
1220
|
+
c.statusOffset = statusMax;
|
|
1221
|
+
if (c.roomOffsets) {
|
|
1222
|
+
for (const chan of Object.keys(c.roomOffsets)) {
|
|
1223
|
+
const max = counts.get(roomFile(chan))?.parsed ?? 0;
|
|
1224
|
+
if (c.roomOffsets[chan] > max)
|
|
1225
|
+
c.roomOffsets[chan] = max;
|
|
1226
|
+
}
|
|
1227
|
+
}
|
|
1228
|
+
return c;
|
|
1229
|
+
});
|
|
1230
|
+
fixed.push(`clamped cursor offsets for ${id}`);
|
|
1231
|
+
}
|
|
1232
|
+
}
|
|
1233
|
+
}
|
|
1234
|
+
findings.push({
|
|
1235
|
+
check: "cursor-past-eof",
|
|
1236
|
+
level: broken.length ? "error" : "ok",
|
|
1237
|
+
detail: broken.length ? `${broken.length} cursor(s) with an offset past EOF — delivery stalled` : "all cursor offsets are within bounds",
|
|
1238
|
+
fixable: true,
|
|
1239
|
+
items: broken.length ? broken : undefined,
|
|
1240
|
+
});
|
|
1241
|
+
}
|
|
1242
|
+
// 5. Malformed JSONL lines (silently desync offset math between server + hooks).
|
|
1243
|
+
{
|
|
1244
|
+
const jsonlFiles = [
|
|
1245
|
+
ROOM_FILE,
|
|
1246
|
+
STATUS_FILE,
|
|
1247
|
+
...channels.filter((c) => c !== DEFAULT_ROOM).map((c) => roomFile(c)),
|
|
1248
|
+
...(await listInboxFiles()).map((f) => path.join(INBOX_DIR, f)),
|
|
1249
|
+
];
|
|
1250
|
+
const bad = [];
|
|
1251
|
+
for (const file of jsonlFiles) {
|
|
1252
|
+
const c = await countFor(file);
|
|
1253
|
+
if (c.malformed > 0) {
|
|
1254
|
+
bad.push(`${path.basename(file)} (${c.malformed})`);
|
|
1255
|
+
if (fix) {
|
|
1256
|
+
await fsp.copyFile(file, file + ".bak");
|
|
1257
|
+
await rewriteJsonl(file, () => true); // drops unparseable lines
|
|
1258
|
+
fixed.push(`rewrote ${path.basename(file)} dropping ${c.malformed} malformed line(s) (backup: ${path.basename(file)}.bak)`);
|
|
1259
|
+
}
|
|
1260
|
+
}
|
|
1261
|
+
}
|
|
1262
|
+
findings.push({
|
|
1263
|
+
check: "malformed-jsonl",
|
|
1264
|
+
level: bad.length ? "warn" : "ok",
|
|
1265
|
+
detail: bad.length ? `${bad.length} file(s) contain unparseable lines` : "no malformed JSONL lines",
|
|
1266
|
+
fixable: true,
|
|
1267
|
+
items: bad.length ? bad : undefined,
|
|
1268
|
+
});
|
|
1269
|
+
}
|
|
1270
|
+
// 6. Stale agents (registered, no live transport, heartbeat past EVICT_MS). Report only.
|
|
1271
|
+
{
|
|
1272
|
+
// Compute liveness WITHOUT deleting dead markers — loadLiveTransports
|
|
1273
|
+
// prunes as a side effect, which would make this read-only check mutate
|
|
1274
|
+
// state (and pre-empt the orphan-marker fix in check 1).
|
|
1275
|
+
const live = new Set();
|
|
1276
|
+
for (const fname of await listTransportFiles()) {
|
|
1277
|
+
const marker = await readJson(path.join(TRANSPORT_DIR, fname), null);
|
|
1278
|
+
if (marker && isMarkerLive(marker, reg, now))
|
|
1279
|
+
live.add(marker.agentId);
|
|
1280
|
+
}
|
|
1281
|
+
const stale = [];
|
|
1282
|
+
for (const [id, a] of Object.entries(reg)) {
|
|
1283
|
+
if (live.has(id))
|
|
1284
|
+
continue;
|
|
1285
|
+
if (now - a.lastHeartbeat > EVICT_MS)
|
|
1286
|
+
stale.push(`${id} (${Math.floor((now - a.lastHeartbeat) / 3600000)}h)`);
|
|
1287
|
+
}
|
|
1288
|
+
findings.push({
|
|
1289
|
+
check: "stale-agents",
|
|
1290
|
+
level: stale.length ? "warn" : "ok",
|
|
1291
|
+
detail: stale.length ? `${stale.length} agent(s) past the eviction window — next list_agents will drop them` : "no stale agents",
|
|
1292
|
+
fixable: false,
|
|
1293
|
+
items: stale.length ? stale : undefined,
|
|
1294
|
+
});
|
|
1295
|
+
}
|
|
1296
|
+
// 7. Oversized JSONL files. Report only (suggest prune).
|
|
1297
|
+
{
|
|
1298
|
+
const big = [];
|
|
1299
|
+
const candidates = [
|
|
1300
|
+
ROOM_FILE,
|
|
1301
|
+
STATUS_FILE,
|
|
1302
|
+
...channels.filter((c) => c !== DEFAULT_ROOM).map((c) => roomFile(c)),
|
|
1303
|
+
...(await listInboxFiles()).map((f) => path.join(INBOX_DIR, f)),
|
|
1304
|
+
];
|
|
1305
|
+
for (const file of candidates) {
|
|
1306
|
+
const sz = await fileSize(file);
|
|
1307
|
+
if (sz > maxBytes)
|
|
1308
|
+
big.push(`${path.basename(file)} (${(sz / 1024 / 1024).toFixed(1)}MB)`);
|
|
1309
|
+
}
|
|
1310
|
+
findings.push({
|
|
1311
|
+
check: "oversized-files",
|
|
1312
|
+
level: big.length ? "warn" : "ok",
|
|
1313
|
+
detail: big.length ? `${big.length} file(s) over ${(maxBytes / 1024 / 1024).toFixed(0)}MB — consider prune` : "no oversized files",
|
|
1314
|
+
fixable: false,
|
|
1315
|
+
items: big.length ? big : undefined,
|
|
1316
|
+
});
|
|
1317
|
+
}
|
|
1318
|
+
// 8. Stale lock dirs from crashed writers.
|
|
1319
|
+
{
|
|
1320
|
+
const locks = await scanStaleLocks(60_000, now);
|
|
1321
|
+
for (const l of locks) {
|
|
1322
|
+
if (fix) {
|
|
1323
|
+
try {
|
|
1324
|
+
await fsp.rm(l.path, { recursive: true, force: true });
|
|
1325
|
+
fixed.push(`removed stale lock ${path.basename(l.path)}`);
|
|
1326
|
+
}
|
|
1327
|
+
catch {
|
|
1328
|
+
// ignore
|
|
1329
|
+
}
|
|
1330
|
+
}
|
|
1331
|
+
}
|
|
1332
|
+
findings.push({
|
|
1333
|
+
check: "stale-locks",
|
|
1334
|
+
level: locks.length ? "warn" : "ok",
|
|
1335
|
+
detail: locks.length ? `${locks.length} lock dir(s) older than 60s — likely from a crashed writer` : "no stale locks",
|
|
1336
|
+
fixable: true,
|
|
1337
|
+
items: locks.length ? locks.map((l) => `${path.basename(l.path)} (${Math.floor(l.ageMs / 1000)}s)`) : undefined,
|
|
1338
|
+
});
|
|
1339
|
+
}
|
|
1340
|
+
// 9. Channel/registry consistency: rooms/<chan>.jsonl files without a registry entry.
|
|
1341
|
+
{
|
|
1342
|
+
const orphanFiles = [];
|
|
1343
|
+
if (existsSync(ROOMS_DIR)) {
|
|
1344
|
+
let names = [];
|
|
1345
|
+
try {
|
|
1346
|
+
names = await fsp.readdir(ROOMS_DIR);
|
|
1347
|
+
}
|
|
1348
|
+
catch {
|
|
1349
|
+
// ignore
|
|
1350
|
+
}
|
|
1351
|
+
for (const name of names) {
|
|
1352
|
+
if (!name.endsWith(".jsonl"))
|
|
1353
|
+
continue;
|
|
1354
|
+
const chan = name.replace(/\.jsonl$/, "");
|
|
1355
|
+
if (!rooms[chan]) {
|
|
1356
|
+
orphanFiles.push(name);
|
|
1357
|
+
if (fix) {
|
|
1358
|
+
await ensureRoom(chan, "doctor");
|
|
1359
|
+
fixed.push(`registered channel '${chan}' (had a JSONL file but no registry entry)`);
|
|
1360
|
+
}
|
|
1361
|
+
}
|
|
1362
|
+
}
|
|
1363
|
+
}
|
|
1364
|
+
findings.push({
|
|
1365
|
+
check: "channel-registry-consistency",
|
|
1366
|
+
level: orphanFiles.length ? "warn" : "ok",
|
|
1367
|
+
detail: orphanFiles.length ? `${orphanFiles.length} channel file(s) with no registry entry` : "channel files and registry agree",
|
|
1368
|
+
fixable: true,
|
|
1369
|
+
items: orphanFiles.length ? orphanFiles : undefined,
|
|
1370
|
+
});
|
|
1371
|
+
}
|
|
1372
|
+
// 10. Environment sanity. Report only.
|
|
1373
|
+
{
|
|
1374
|
+
const tmuxProbe = spawnSync("tmux", ["-V"]);
|
|
1375
|
+
const tmuxOk = tmuxProbe.status === 0;
|
|
1376
|
+
findings.push({
|
|
1377
|
+
check: "environment",
|
|
1378
|
+
level: tmuxOk ? "ok" : "warn",
|
|
1379
|
+
detail: tmuxOk
|
|
1380
|
+
? `root=${ROOT}; node=${process.execPath}; tmux=${(tmuxProbe.stdout ?? "").toString().trim() || "present"}`
|
|
1381
|
+
: `root=${ROOT}; node=${process.execPath}; tmux NOT on PATH — the tmux-push transport will not work`,
|
|
1382
|
+
fixable: false,
|
|
1383
|
+
items: [`root=${ROOT}`, `execPath=${process.execPath}`, `inTmux=${!!process.env.TMUX_PANE}`],
|
|
1384
|
+
});
|
|
1385
|
+
}
|
|
1386
|
+
const summary = {
|
|
1387
|
+
ok: findings.filter((f) => f.level === "ok").length,
|
|
1388
|
+
warn: findings.filter((f) => f.level === "warn").length,
|
|
1389
|
+
error: findings.filter((f) => f.level === "error").length,
|
|
1390
|
+
};
|
|
1391
|
+
return {
|
|
1392
|
+
ok: true,
|
|
1393
|
+
healthy: summary.warn === 0 && summary.error === 0,
|
|
1394
|
+
fixApplied: fix,
|
|
1395
|
+
root: ROOT,
|
|
1396
|
+
findings,
|
|
1397
|
+
fixed: fix ? fixed : undefined,
|
|
1398
|
+
summary,
|
|
1399
|
+
};
|
|
1400
|
+
}
|
|
930
1401
|
// ---------- helpers ----------
|
|
931
1402
|
async function moveFile(from, to) {
|
|
932
1403
|
if (!existsSync(from))
|