agent-coord-mcp 0.5.3 → 0.7.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +97 -8
- package/dist/server.js +236 -25
- package/dist/server.js.map +1 -1
- package/dist/store.js +50 -1
- package/dist/store.js.map +1 -1
- package/dist/tools.js +424 -6
- package/dist/tools.js.map +1 -1
- package/package.json +4 -3
- package/scripts/coord-pusher.mjs +288 -0
- package/src/server.ts +279 -26
- package/src/store.ts +56 -1
- package/src/tools.ts +444 -5
package/dist/tools.js
CHANGED
|
@@ -5,7 +5,7 @@ import { spawn, spawnSync } from "node:child_process";
|
|
|
5
5
|
import { fileURLToPath } from "node:url";
|
|
6
6
|
import { z } from "zod";
|
|
7
7
|
import path from "node:path";
|
|
8
|
-
import { AGENTS_FILE, CURSOR_DIR, DEFAULT_ROOM, INBOX_DIR, ROOMS_FILE, STATUS_FILE, addMember, appendJsonl, cursorFile, deleteFile, ensureRoom, fileSize, getRooms, inboxFile, listCursorFiles, listInboxFiles, listTransportFiles, logFile, memberRooms, normalizeRoom, pidFile, readJson, readJsonl, removeMember, rewriteJsonl, roomFile, setRoomMeta, transportFile, TRANSPORT_DIR, updateJson, } from "./store.js";
|
|
8
|
+
import { AGENTS_FILE, CURSOR_DIR, DEFAULT_ROOM, INBOX_DIR, ROOT, ROOM_FILE, ROOMS_DIR, ROOMS_FILE, STATUS_FILE, addMember, appendJsonl, cursorFile, deleteFile, ensureRoom, fileSize, getRooms, inboxFile, listCursorFiles, listInboxFiles, listTransportFiles, logFile, memberRooms, normalizeRoom, pidFile, readJson, readJsonl, removeMember, rewriteJsonl, roomFile, rotateAgentToken, setRoomMeta, transportFile, TRANSPORT_DIR, updateJson, } from "./store.js";
|
|
9
9
|
// Resolve the physical file for a (source, agent, channel) tuple.
|
|
10
10
|
function sourceFile(source, agentId, room) {
|
|
11
11
|
if (source === "inbox")
|
|
@@ -152,14 +152,12 @@ export async function listAgentsTool() {
|
|
|
152
152
|
}
|
|
153
153
|
async function loadLiveTransports() {
|
|
154
154
|
const out = new Map();
|
|
155
|
+
const reg = await readJson(AGENTS_FILE, {});
|
|
156
|
+
const now = Date.now();
|
|
155
157
|
for (const fname of await listTransportFiles()) {
|
|
156
158
|
const file = path.join(TRANSPORT_DIR, fname);
|
|
157
159
|
const marker = await readJson(file, null);
|
|
158
|
-
if (!marker) {
|
|
159
|
-
await deleteFile(file);
|
|
160
|
-
continue;
|
|
161
|
-
}
|
|
162
|
-
if (!isPidAlive(marker.pid)) {
|
|
160
|
+
if (!marker || !isMarkerLive(marker, reg, now)) {
|
|
163
161
|
await deleteFile(file);
|
|
164
162
|
continue;
|
|
165
163
|
}
|
|
@@ -167,6 +165,16 @@ async function loadLiveTransports() {
|
|
|
167
165
|
}
|
|
168
166
|
return out;
|
|
169
167
|
}
|
|
168
|
+
// Liveness for a transport marker. Local markers carry a real pid we can probe;
|
|
169
|
+
// remote markers (tmux-push-remote, pid 0 on a foreign host) can't be — so we
|
|
170
|
+
// trust the registry heartbeat the remote pusher refreshes (within STALE_MS).
|
|
171
|
+
function isMarkerLive(marker, reg, now) {
|
|
172
|
+
if (marker.transport === "tmux-push-remote") {
|
|
173
|
+
const entry = reg[marker.agentId];
|
|
174
|
+
return !!entry && now - entry.lastHeartbeat < STALE_MS;
|
|
175
|
+
}
|
|
176
|
+
return isPidAlive(marker.pid);
|
|
177
|
+
}
|
|
170
178
|
function isPidAlive(pid) {
|
|
171
179
|
if (!pid || pid <= 0)
|
|
172
180
|
return false;
|
|
@@ -861,6 +869,10 @@ export async function renameAgentTool(args) {
|
|
|
861
869
|
await moveFile(inboxFile(oldId), inboxFile(newId));
|
|
862
870
|
await moveFile(cursorFile(oldId), cursorFile(newId));
|
|
863
871
|
await moveFile(transportFile(oldId), transportFile(newId));
|
|
872
|
+
// Identity-binding token rotation: if tokens.json exists and had the old
|
|
873
|
+
// id, move its token to the new id atomically. Lets the same bearer keep
|
|
874
|
+
// authenticating after rename — no-op if binding isn't configured.
|
|
875
|
+
await rotateAgentToken(oldId, newId);
|
|
864
876
|
// Broadcast a NICK notice to every channel the agent was in.
|
|
865
877
|
for (const chan of joined) {
|
|
866
878
|
await appendJsonl(roomFile(chan), sysMsg(newId, chan, `is now known as ${newId} (was ${oldId})`));
|
|
@@ -876,6 +888,412 @@ export async function renameAgentTool(args) {
|
|
|
876
888
|
: {}),
|
|
877
889
|
};
|
|
878
890
|
}
|
|
891
|
+
// ---------- transport markers (for remote pushers) ----------
|
|
892
|
+
export const reportTransportSchema = {
|
|
893
|
+
agentId: z.string().min(1),
|
|
894
|
+
transport: z.string().min(1),
|
|
895
|
+
tmuxTarget: z.string().optional(),
|
|
896
|
+
host: z.string().optional(),
|
|
897
|
+
since: z.number().optional(),
|
|
898
|
+
};
|
|
899
|
+
// Called by an external push daemon (typically scripts/coord-pusher.mjs on a
|
|
900
|
+
// remote machine) to publish a transport marker so list_agents reflects the
|
|
901
|
+
// attachment. The local tmux-push path writes the marker directly inside
|
|
902
|
+
// attach_agent; this is the wire-callable equivalent for remote pushers.
|
|
903
|
+
export async function reportTransportTool(args) {
|
|
904
|
+
const marker = {
|
|
905
|
+
agentId: args.agentId,
|
|
906
|
+
transport: args.transport,
|
|
907
|
+
pid: 0, // not meaningful for remote; liveness comes from heartbeat
|
|
908
|
+
tmuxTarget: args.tmuxTarget,
|
|
909
|
+
host: args.host,
|
|
910
|
+
since: args.since ?? Date.now(),
|
|
911
|
+
};
|
|
912
|
+
await updateJson(transportFile(args.agentId), marker, () => marker);
|
|
913
|
+
return { ok: true, marker };
|
|
914
|
+
}
|
|
915
|
+
export const clearTransportSchema = {
|
|
916
|
+
agentId: z.string().min(1),
|
|
917
|
+
};
|
|
918
|
+
// Idempotent remote-counterpart to detach_agent: just deletes the marker. Used
|
|
919
|
+
// by the remote pusher on graceful shutdown so list_agents stops showing it
|
|
920
|
+
// attached. (Does NOT try to kill any process — there's nothing local to kill.)
|
|
921
|
+
export async function clearTransportTool(args) {
|
|
922
|
+
const removed = await deleteFile(transportFile(args.agentId));
|
|
923
|
+
return { ok: true, removed };
|
|
924
|
+
}
|
|
925
|
+
// Count non-empty lines vs successfully-parsed entries in a JSONL file.
|
|
926
|
+
// Offsets index the PARSED entries (see readJsonl), so `parsed` is the figure
|
|
927
|
+
// cursor math is compared against; `malformed` is the desync risk.
|
|
928
|
+
async function scanJsonl(file) {
|
|
929
|
+
if (!existsSync(file))
|
|
930
|
+
return { lines: 0, parsed: 0, malformed: 0 };
|
|
931
|
+
const raw = await fsp.readFile(file, "utf8");
|
|
932
|
+
let lines = 0;
|
|
933
|
+
let parsed = 0;
|
|
934
|
+
for (const line of raw.split("\n")) {
|
|
935
|
+
if (!line.trim())
|
|
936
|
+
continue;
|
|
937
|
+
lines++;
|
|
938
|
+
try {
|
|
939
|
+
JSON.parse(line);
|
|
940
|
+
parsed++;
|
|
941
|
+
}
|
|
942
|
+
catch {
|
|
943
|
+
// malformed
|
|
944
|
+
}
|
|
945
|
+
}
|
|
946
|
+
return { lines, parsed, malformed: lines - parsed };
|
|
947
|
+
}
|
|
948
|
+
// Find leftover proper-lockfile lock dirs (`<file>.lock`) across the state
|
|
949
|
+
// dirs. Anything older than the threshold is almost certainly orphaned by a
|
|
950
|
+
// crashed writer (withLock's stale window is 5s).
|
|
951
|
+
async function scanStaleLocks(olderThanMs, now) {
|
|
952
|
+
const out = [];
|
|
953
|
+
const dirs = [ROOT, INBOX_DIR, CURSOR_DIR, ROOMS_DIR, TRANSPORT_DIR];
|
|
954
|
+
for (const dir of dirs) {
|
|
955
|
+
if (!existsSync(dir))
|
|
956
|
+
continue;
|
|
957
|
+
let names;
|
|
958
|
+
try {
|
|
959
|
+
names = await fsp.readdir(dir);
|
|
960
|
+
}
|
|
961
|
+
catch {
|
|
962
|
+
continue;
|
|
963
|
+
}
|
|
964
|
+
for (const name of names) {
|
|
965
|
+
if (!name.endsWith(".lock"))
|
|
966
|
+
continue;
|
|
967
|
+
const p = path.join(dir, name);
|
|
968
|
+
try {
|
|
969
|
+
const st = await fsp.stat(p);
|
|
970
|
+
const ageMs = now - st.mtimeMs;
|
|
971
|
+
if (ageMs > olderThanMs)
|
|
972
|
+
out.push({ path: p, ageMs });
|
|
973
|
+
}
|
|
974
|
+
catch {
|
|
975
|
+
// vanished mid-scan
|
|
976
|
+
}
|
|
977
|
+
}
|
|
978
|
+
}
|
|
979
|
+
return out;
|
|
980
|
+
}
|
|
981
|
+
export const doctorSchema = {
|
|
982
|
+
fix: z.boolean().optional(),
|
|
983
|
+
maxFileBytes: z.number().int().positive().optional(),
|
|
984
|
+
};
|
|
985
|
+
export async function doctorTool(args) {
|
|
986
|
+
const fix = args.fix ?? false;
|
|
987
|
+
const maxBytes = args.maxFileBytes ?? 5 * 1024 * 1024;
|
|
988
|
+
const now = Date.now();
|
|
989
|
+
const findings = [];
|
|
990
|
+
const fixed = [];
|
|
991
|
+
const reg = await readJson(AGENTS_FILE, {});
|
|
992
|
+
const known = new Set(Object.keys(reg));
|
|
993
|
+
const rooms = await getRooms();
|
|
994
|
+
const channels = Object.keys(rooms);
|
|
995
|
+
// 1. Orphan transport markers (dead local pid, or stale remote heartbeat).
|
|
996
|
+
{
|
|
997
|
+
const dead = [];
|
|
998
|
+
for (const fname of await listTransportFiles()) {
|
|
999
|
+
const file = path.join(TRANSPORT_DIR, fname);
|
|
1000
|
+
const marker = await readJson(file, null);
|
|
1001
|
+
if (!marker || !isMarkerLive(marker, reg, now)) {
|
|
1002
|
+
dead.push(file);
|
|
1003
|
+
if (fix) {
|
|
1004
|
+
await deleteFile(file);
|
|
1005
|
+
fixed.push(`deleted stale transport marker ${fname}`);
|
|
1006
|
+
}
|
|
1007
|
+
}
|
|
1008
|
+
}
|
|
1009
|
+
findings.push({
|
|
1010
|
+
check: "orphan-transport-markers",
|
|
1011
|
+
level: dead.length ? "warn" : "ok",
|
|
1012
|
+
detail: dead.length ? `${dead.length} stale transport marker(s) (dead pid or expired remote heartbeat)` : "no stale transport markers",
|
|
1013
|
+
fixable: true,
|
|
1014
|
+
items: dead.length ? dead.map((f) => path.basename(f)) : undefined,
|
|
1015
|
+
});
|
|
1016
|
+
}
|
|
1017
|
+
// 2. Orphan room memberships (member not in the registry).
|
|
1018
|
+
{
|
|
1019
|
+
const orphans = new Set();
|
|
1020
|
+
for (const e of Object.values(rooms)) {
|
|
1021
|
+
for (const m of e.members ?? [])
|
|
1022
|
+
if (!known.has(m))
|
|
1023
|
+
orphans.add(m);
|
|
1024
|
+
}
|
|
1025
|
+
if (fix && orphans.size) {
|
|
1026
|
+
await updateJson(ROOMS_FILE, {}, (cur) => {
|
|
1027
|
+
for (const e of Object.values(cur)) {
|
|
1028
|
+
if (e.members?.length)
|
|
1029
|
+
e.members = e.members.filter((m) => known.has(m));
|
|
1030
|
+
}
|
|
1031
|
+
return cur;
|
|
1032
|
+
});
|
|
1033
|
+
fixed.push(`dropped ${orphans.size} orphan membership(s): ${[...orphans].join(", ")}`);
|
|
1034
|
+
}
|
|
1035
|
+
findings.push({
|
|
1036
|
+
check: "orphan-room-memberships",
|
|
1037
|
+
level: orphans.size ? "warn" : "ok",
|
|
1038
|
+
detail: orphans.size ? `${orphans.size} channel member(s) not in the registry` : "all channel members are registered",
|
|
1039
|
+
fixable: true,
|
|
1040
|
+
items: orphans.size ? [...orphans] : undefined,
|
|
1041
|
+
});
|
|
1042
|
+
}
|
|
1043
|
+
// 3. Orphan inbox / cursor files (owner not registered).
|
|
1044
|
+
{
|
|
1045
|
+
const orphanInbox = [];
|
|
1046
|
+
for (const fname of await listInboxFiles()) {
|
|
1047
|
+
const id = fname.replace(/\.jsonl$/, "");
|
|
1048
|
+
if (!known.has(id)) {
|
|
1049
|
+
orphanInbox.push(id);
|
|
1050
|
+
if (fix) {
|
|
1051
|
+
await deleteFile(path.join(INBOX_DIR, fname));
|
|
1052
|
+
fixed.push(`deleted orphan inbox ${fname}`);
|
|
1053
|
+
}
|
|
1054
|
+
}
|
|
1055
|
+
}
|
|
1056
|
+
const orphanCursor = [];
|
|
1057
|
+
for (const fname of await listCursorFiles()) {
|
|
1058
|
+
const id = fname.replace(/\.json$/, "");
|
|
1059
|
+
if (!known.has(id)) {
|
|
1060
|
+
orphanCursor.push(id);
|
|
1061
|
+
if (fix) {
|
|
1062
|
+
await deleteFile(path.join(CURSOR_DIR, fname));
|
|
1063
|
+
fixed.push(`deleted orphan cursor ${fname}`);
|
|
1064
|
+
}
|
|
1065
|
+
}
|
|
1066
|
+
}
|
|
1067
|
+
const total = orphanInbox.length + orphanCursor.length;
|
|
1068
|
+
findings.push({
|
|
1069
|
+
check: "orphan-inboxes-cursors",
|
|
1070
|
+
level: total ? "warn" : "ok",
|
|
1071
|
+
detail: total
|
|
1072
|
+
? `${orphanInbox.length} inbox + ${orphanCursor.length} cursor file(s) for unregistered ids`
|
|
1073
|
+
: "no orphan inbox/cursor files",
|
|
1074
|
+
fixable: true,
|
|
1075
|
+
items: total ? [...new Set([...orphanInbox, ...orphanCursor])] : undefined,
|
|
1076
|
+
});
|
|
1077
|
+
}
|
|
1078
|
+
// Precompute parsed line counts for cursor + malformed checks.
|
|
1079
|
+
const counts = new Map();
|
|
1080
|
+
const countFor = async (file) => {
|
|
1081
|
+
if (!counts.has(file))
|
|
1082
|
+
counts.set(file, await scanJsonl(file));
|
|
1083
|
+
return counts.get(file);
|
|
1084
|
+
};
|
|
1085
|
+
// 4. Cursor offsets past end-of-file (would return [] forever).
|
|
1086
|
+
{
|
|
1087
|
+
const broken = [];
|
|
1088
|
+
for (const fname of await listCursorFiles()) {
|
|
1089
|
+
const id = fname.replace(/\.json$/, "");
|
|
1090
|
+
const cursorPath = path.join(CURSOR_DIR, fname);
|
|
1091
|
+
const cursor = await readJson(cursorPath, {});
|
|
1092
|
+
const overflow = [];
|
|
1093
|
+
const inboxMax = (await countFor(inboxFile(id))).parsed;
|
|
1094
|
+
if ((cursor.inboxOffset ?? 0) > inboxMax)
|
|
1095
|
+
overflow.push(`inboxOffset ${cursor.inboxOffset}>${inboxMax}`);
|
|
1096
|
+
const roomMax = (await countFor(ROOM_FILE)).parsed;
|
|
1097
|
+
if ((cursor.roomOffset ?? 0) > roomMax)
|
|
1098
|
+
overflow.push(`roomOffset ${cursor.roomOffset}>${roomMax}`);
|
|
1099
|
+
const statusMax = (await countFor(STATUS_FILE)).parsed;
|
|
1100
|
+
if ((cursor.statusOffset ?? 0) > statusMax)
|
|
1101
|
+
overflow.push(`statusOffset ${cursor.statusOffset}>${statusMax}`);
|
|
1102
|
+
for (const [chan, off] of Object.entries(cursor.roomOffsets ?? {})) {
|
|
1103
|
+
const max = (await countFor(roomFile(chan))).parsed;
|
|
1104
|
+
if (off > max)
|
|
1105
|
+
overflow.push(`roomOffsets[${chan}] ${off}>${max}`);
|
|
1106
|
+
}
|
|
1107
|
+
if (overflow.length) {
|
|
1108
|
+
broken.push(`${id}: ${overflow.join(", ")}`);
|
|
1109
|
+
if (fix) {
|
|
1110
|
+
await updateJson(cursorPath, {}, (c) => {
|
|
1111
|
+
if ((c.inboxOffset ?? 0) > inboxMax)
|
|
1112
|
+
c.inboxOffset = inboxMax;
|
|
1113
|
+
if ((c.roomOffset ?? 0) > roomMax)
|
|
1114
|
+
c.roomOffset = roomMax;
|
|
1115
|
+
if ((c.statusOffset ?? 0) > statusMax)
|
|
1116
|
+
c.statusOffset = statusMax;
|
|
1117
|
+
if (c.roomOffsets) {
|
|
1118
|
+
for (const chan of Object.keys(c.roomOffsets)) {
|
|
1119
|
+
const max = counts.get(roomFile(chan))?.parsed ?? 0;
|
|
1120
|
+
if (c.roomOffsets[chan] > max)
|
|
1121
|
+
c.roomOffsets[chan] = max;
|
|
1122
|
+
}
|
|
1123
|
+
}
|
|
1124
|
+
return c;
|
|
1125
|
+
});
|
|
1126
|
+
fixed.push(`clamped cursor offsets for ${id}`);
|
|
1127
|
+
}
|
|
1128
|
+
}
|
|
1129
|
+
}
|
|
1130
|
+
findings.push({
|
|
1131
|
+
check: "cursor-past-eof",
|
|
1132
|
+
level: broken.length ? "error" : "ok",
|
|
1133
|
+
detail: broken.length ? `${broken.length} cursor(s) with an offset past EOF — delivery stalled` : "all cursor offsets are within bounds",
|
|
1134
|
+
fixable: true,
|
|
1135
|
+
items: broken.length ? broken : undefined,
|
|
1136
|
+
});
|
|
1137
|
+
}
|
|
1138
|
+
// 5. Malformed JSONL lines (silently desync offset math between server + hooks).
|
|
1139
|
+
{
|
|
1140
|
+
const jsonlFiles = [
|
|
1141
|
+
ROOM_FILE,
|
|
1142
|
+
STATUS_FILE,
|
|
1143
|
+
...channels.filter((c) => c !== DEFAULT_ROOM).map((c) => roomFile(c)),
|
|
1144
|
+
...(await listInboxFiles()).map((f) => path.join(INBOX_DIR, f)),
|
|
1145
|
+
];
|
|
1146
|
+
const bad = [];
|
|
1147
|
+
for (const file of jsonlFiles) {
|
|
1148
|
+
const c = await countFor(file);
|
|
1149
|
+
if (c.malformed > 0) {
|
|
1150
|
+
bad.push(`${path.basename(file)} (${c.malformed})`);
|
|
1151
|
+
if (fix) {
|
|
1152
|
+
await fsp.copyFile(file, file + ".bak");
|
|
1153
|
+
await rewriteJsonl(file, () => true); // drops unparseable lines
|
|
1154
|
+
fixed.push(`rewrote ${path.basename(file)} dropping ${c.malformed} malformed line(s) (backup: ${path.basename(file)}.bak)`);
|
|
1155
|
+
}
|
|
1156
|
+
}
|
|
1157
|
+
}
|
|
1158
|
+
findings.push({
|
|
1159
|
+
check: "malformed-jsonl",
|
|
1160
|
+
level: bad.length ? "warn" : "ok",
|
|
1161
|
+
detail: bad.length ? `${bad.length} file(s) contain unparseable lines` : "no malformed JSONL lines",
|
|
1162
|
+
fixable: true,
|
|
1163
|
+
items: bad.length ? bad : undefined,
|
|
1164
|
+
});
|
|
1165
|
+
}
|
|
1166
|
+
// 6. Stale agents (registered, no live transport, heartbeat past EVICT_MS). Report only.
|
|
1167
|
+
{
|
|
1168
|
+
// Compute liveness WITHOUT deleting dead markers — loadLiveTransports
|
|
1169
|
+
// prunes as a side effect, which would make this read-only check mutate
|
|
1170
|
+
// state (and pre-empt the orphan-marker fix in check 1).
|
|
1171
|
+
const live = new Set();
|
|
1172
|
+
for (const fname of await listTransportFiles()) {
|
|
1173
|
+
const marker = await readJson(path.join(TRANSPORT_DIR, fname), null);
|
|
1174
|
+
if (marker && isMarkerLive(marker, reg, now))
|
|
1175
|
+
live.add(marker.agentId);
|
|
1176
|
+
}
|
|
1177
|
+
const stale = [];
|
|
1178
|
+
for (const [id, a] of Object.entries(reg)) {
|
|
1179
|
+
if (live.has(id))
|
|
1180
|
+
continue;
|
|
1181
|
+
if (now - a.lastHeartbeat > EVICT_MS)
|
|
1182
|
+
stale.push(`${id} (${Math.floor((now - a.lastHeartbeat) / 3600000)}h)`);
|
|
1183
|
+
}
|
|
1184
|
+
findings.push({
|
|
1185
|
+
check: "stale-agents",
|
|
1186
|
+
level: stale.length ? "warn" : "ok",
|
|
1187
|
+
detail: stale.length ? `${stale.length} agent(s) past the eviction window — next list_agents will drop them` : "no stale agents",
|
|
1188
|
+
fixable: false,
|
|
1189
|
+
items: stale.length ? stale : undefined,
|
|
1190
|
+
});
|
|
1191
|
+
}
|
|
1192
|
+
// 7. Oversized JSONL files. Report only (suggest prune).
|
|
1193
|
+
{
|
|
1194
|
+
const big = [];
|
|
1195
|
+
const candidates = [
|
|
1196
|
+
ROOM_FILE,
|
|
1197
|
+
STATUS_FILE,
|
|
1198
|
+
...channels.filter((c) => c !== DEFAULT_ROOM).map((c) => roomFile(c)),
|
|
1199
|
+
...(await listInboxFiles()).map((f) => path.join(INBOX_DIR, f)),
|
|
1200
|
+
];
|
|
1201
|
+
for (const file of candidates) {
|
|
1202
|
+
const sz = await fileSize(file);
|
|
1203
|
+
if (sz > maxBytes)
|
|
1204
|
+
big.push(`${path.basename(file)} (${(sz / 1024 / 1024).toFixed(1)}MB)`);
|
|
1205
|
+
}
|
|
1206
|
+
findings.push({
|
|
1207
|
+
check: "oversized-files",
|
|
1208
|
+
level: big.length ? "warn" : "ok",
|
|
1209
|
+
detail: big.length ? `${big.length} file(s) over ${(maxBytes / 1024 / 1024).toFixed(0)}MB — consider prune` : "no oversized files",
|
|
1210
|
+
fixable: false,
|
|
1211
|
+
items: big.length ? big : undefined,
|
|
1212
|
+
});
|
|
1213
|
+
}
|
|
1214
|
+
// 8. Stale lock dirs from crashed writers.
|
|
1215
|
+
{
|
|
1216
|
+
const locks = await scanStaleLocks(60_000, now);
|
|
1217
|
+
for (const l of locks) {
|
|
1218
|
+
if (fix) {
|
|
1219
|
+
try {
|
|
1220
|
+
await fsp.rm(l.path, { recursive: true, force: true });
|
|
1221
|
+
fixed.push(`removed stale lock ${path.basename(l.path)}`);
|
|
1222
|
+
}
|
|
1223
|
+
catch {
|
|
1224
|
+
// ignore
|
|
1225
|
+
}
|
|
1226
|
+
}
|
|
1227
|
+
}
|
|
1228
|
+
findings.push({
|
|
1229
|
+
check: "stale-locks",
|
|
1230
|
+
level: locks.length ? "warn" : "ok",
|
|
1231
|
+
detail: locks.length ? `${locks.length} lock dir(s) older than 60s — likely from a crashed writer` : "no stale locks",
|
|
1232
|
+
fixable: true,
|
|
1233
|
+
items: locks.length ? locks.map((l) => `${path.basename(l.path)} (${Math.floor(l.ageMs / 1000)}s)`) : undefined,
|
|
1234
|
+
});
|
|
1235
|
+
}
|
|
1236
|
+
// 9. Channel/registry consistency: rooms/<chan>.jsonl files without a registry entry.
|
|
1237
|
+
{
|
|
1238
|
+
const orphanFiles = [];
|
|
1239
|
+
if (existsSync(ROOMS_DIR)) {
|
|
1240
|
+
let names = [];
|
|
1241
|
+
try {
|
|
1242
|
+
names = await fsp.readdir(ROOMS_DIR);
|
|
1243
|
+
}
|
|
1244
|
+
catch {
|
|
1245
|
+
// ignore
|
|
1246
|
+
}
|
|
1247
|
+
for (const name of names) {
|
|
1248
|
+
if (!name.endsWith(".jsonl"))
|
|
1249
|
+
continue;
|
|
1250
|
+
const chan = name.replace(/\.jsonl$/, "");
|
|
1251
|
+
if (!rooms[chan]) {
|
|
1252
|
+
orphanFiles.push(name);
|
|
1253
|
+
if (fix) {
|
|
1254
|
+
await ensureRoom(chan, "doctor");
|
|
1255
|
+
fixed.push(`registered channel '${chan}' (had a JSONL file but no registry entry)`);
|
|
1256
|
+
}
|
|
1257
|
+
}
|
|
1258
|
+
}
|
|
1259
|
+
}
|
|
1260
|
+
findings.push({
|
|
1261
|
+
check: "channel-registry-consistency",
|
|
1262
|
+
level: orphanFiles.length ? "warn" : "ok",
|
|
1263
|
+
detail: orphanFiles.length ? `${orphanFiles.length} channel file(s) with no registry entry` : "channel files and registry agree",
|
|
1264
|
+
fixable: true,
|
|
1265
|
+
items: orphanFiles.length ? orphanFiles : undefined,
|
|
1266
|
+
});
|
|
1267
|
+
}
|
|
1268
|
+
// 10. Environment sanity. Report only.
|
|
1269
|
+
{
|
|
1270
|
+
const tmuxProbe = spawnSync("tmux", ["-V"]);
|
|
1271
|
+
const tmuxOk = tmuxProbe.status === 0;
|
|
1272
|
+
findings.push({
|
|
1273
|
+
check: "environment",
|
|
1274
|
+
level: tmuxOk ? "ok" : "warn",
|
|
1275
|
+
detail: tmuxOk
|
|
1276
|
+
? `root=${ROOT}; node=${process.execPath}; tmux=${(tmuxProbe.stdout ?? "").toString().trim() || "present"}`
|
|
1277
|
+
: `root=${ROOT}; node=${process.execPath}; tmux NOT on PATH — the tmux-push transport will not work`,
|
|
1278
|
+
fixable: false,
|
|
1279
|
+
items: [`root=${ROOT}`, `execPath=${process.execPath}`, `inTmux=${!!process.env.TMUX_PANE}`],
|
|
1280
|
+
});
|
|
1281
|
+
}
|
|
1282
|
+
const summary = {
|
|
1283
|
+
ok: findings.filter((f) => f.level === "ok").length,
|
|
1284
|
+
warn: findings.filter((f) => f.level === "warn").length,
|
|
1285
|
+
error: findings.filter((f) => f.level === "error").length,
|
|
1286
|
+
};
|
|
1287
|
+
return {
|
|
1288
|
+
ok: true,
|
|
1289
|
+
healthy: summary.warn === 0 && summary.error === 0,
|
|
1290
|
+
fixApplied: fix,
|
|
1291
|
+
root: ROOT,
|
|
1292
|
+
findings,
|
|
1293
|
+
fixed: fix ? fixed : undefined,
|
|
1294
|
+
summary,
|
|
1295
|
+
};
|
|
1296
|
+
}
|
|
879
1297
|
// ---------- helpers ----------
|
|
880
1298
|
async function moveFile(from, to) {
|
|
881
1299
|
if (!existsSync(from))
|