sensorium-mcp 3.0.3 → 3.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/services/reconnect-snapshot.service.d.ts.map +1 -1
- package/dist/services/reconnect-snapshot.service.js +20 -3
- package/dist/services/reconnect-snapshot.service.js.map +1 -1
- package/dist/services/worker-cleanup.service.d.ts.map +1 -1
- package/dist/services/worker-cleanup.service.js +28 -5
- package/dist/services/worker-cleanup.service.js.map +1 -1
- package/package.json +1 -1
- package/supervisor/main.go +29 -15
- package/supervisor/updater.go +11 -11
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"reconnect-snapshot.service.d.ts","sourceRoot":"","sources":["../../src/services/reconnect-snapshot.service.ts"],"names":[],"mappings":"AAAA;;;;;;;GAOG;
|
|
1
|
+
{"version":3,"file":"reconnect-snapshot.service.d.ts","sourceRoot":"","sources":["../../src/services/reconnect-snapshot.service.ts"],"names":[],"mappings":"AAAA;;;;;;;GAOG;AAuBH;;;GAGG;AACH,wBAAgB,sBAAsB,CAAC,SAAS,EAAE,MAAM,EAAE,GAAG,IAAI,CAchE;AAED;;;;;;;;GAQG;AACH,wBAAgB,oBAAoB,CAAC,QAAQ,EAAE,MAAM,GAAG,OAAO,CAgC9D;AAED;;;;GAIG;AACH,wBAAgB,sBAAsB,IAAI,IAAI,CAS7C"}
|
|
@@ -6,13 +6,19 @@
|
|
|
6
6
|
* The snapshot is valid for 10 minutes. After that it is either deleted
|
|
7
7
|
* by the auto-cleanup timer or ignored (too old) by isReconnectCandidate.
|
|
8
8
|
*/
|
|
9
|
-
import { existsSync, mkdirSync, readFileSync, unlinkSync, writeFileSync } from "node:fs";
|
|
9
|
+
import { existsSync, mkdirSync, readFileSync, renameSync, unlinkSync, writeFileSync } from "node:fs";
|
|
10
10
|
import { homedir } from "node:os";
|
|
11
11
|
import { join } from "node:path";
|
|
12
12
|
import { log } from "../logger.js";
|
|
13
13
|
const DATA_DIR = join(homedir(), ".remote-copilot-mcp");
|
|
14
14
|
const SNAPSHOT_PATH = join(DATA_DIR, "active-sessions.json");
|
|
15
15
|
const SNAPSHOT_MAX_AGE_MS = 10 * 60 * 1000; // 10 minutes
|
|
16
|
+
/** Write via temp file + rename to prevent partial reads from concurrent access. */
|
|
17
|
+
function atomicWriteSnapshot(data) {
|
|
18
|
+
const tmp = `${SNAPSHOT_PATH}.tmp.${process.pid}`;
|
|
19
|
+
writeFileSync(tmp, data, "utf-8");
|
|
20
|
+
renameSync(tmp, SNAPSHOT_PATH);
|
|
21
|
+
}
|
|
16
22
|
/**
|
|
17
23
|
* Write the set of active thread IDs to the reconnect snapshot file.
|
|
18
24
|
* Called just before the server process exits.
|
|
@@ -24,7 +30,7 @@ export function writeReconnectSnapshot(threadIds) {
|
|
|
24
30
|
threadIds,
|
|
25
31
|
timestamp: new Date().toISOString(),
|
|
26
32
|
};
|
|
27
|
-
|
|
33
|
+
atomicWriteSnapshot(JSON.stringify(snapshot, null, 2));
|
|
28
34
|
log.info(`[reconnect-snapshot] Wrote snapshot with ${threadIds.length} thread(s): ${threadIds.join(", ")}`);
|
|
29
35
|
}
|
|
30
36
|
catch (err) {
|
|
@@ -56,7 +62,18 @@ export function isReconnectCandidate(threadId) {
|
|
|
56
62
|
}
|
|
57
63
|
// Consume: remove this threadId so it can't match again
|
|
58
64
|
snapshot.threadIds = snapshot.threadIds.filter(id => id !== threadId);
|
|
59
|
-
|
|
65
|
+
try {
|
|
66
|
+
if (snapshot.threadIds.length === 0) {
|
|
67
|
+
unlinkSync(SNAPSHOT_PATH);
|
|
68
|
+
}
|
|
69
|
+
else {
|
|
70
|
+
atomicWriteSnapshot(JSON.stringify(snapshot, null, 2));
|
|
71
|
+
}
|
|
72
|
+
}
|
|
73
|
+
catch (writeErr) {
|
|
74
|
+
log.warn(`[reconnect-snapshot] Matched thread ${threadId} but failed to persist consume: ${writeErr}`);
|
|
75
|
+
// Still return true — the match was valid, and the 10-min TTL bounds the risk
|
|
76
|
+
}
|
|
60
77
|
log.info(`[reconnect-snapshot] Consumed reconnect slot for thread ${threadId}.`);
|
|
61
78
|
return true;
|
|
62
79
|
}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"reconnect-snapshot.service.js","sourceRoot":"","sources":["../../src/services/reconnect-snapshot.service.ts"],"names":[],"mappings":"AAAA;;;;;;;GAOG;AAEH,OAAO,EAAE,UAAU,EAAE,SAAS,EAAE,YAAY,EAAE,UAAU,EAAE,aAAa,EAAE,MAAM,SAAS,CAAC;
|
|
1
|
+
{"version":3,"file":"reconnect-snapshot.service.js","sourceRoot":"","sources":["../../src/services/reconnect-snapshot.service.ts"],"names":[],"mappings":"AAAA;;;;;;;GAOG;AAEH,OAAO,EAAE,UAAU,EAAE,SAAS,EAAE,YAAY,EAAE,UAAU,EAAE,UAAU,EAAE,aAAa,EAAE,MAAM,SAAS,CAAC;AACrG,OAAO,EAAE,OAAO,EAAE,MAAM,SAAS,CAAC;AAClC,OAAO,EAAE,IAAI,EAAE,MAAM,WAAW,CAAC;AACjC,OAAO,EAAE,GAAG,EAAE,MAAM,cAAc,CAAC;AAEnC,MAAM,QAAQ,GAAG,IAAI,CAAC,OAAO,EAAE,EAAE,qBAAqB,CAAC,CAAC;AACxD,MAAM,aAAa,GAAG,IAAI,CAAC,QAAQ,EAAE,sBAAsB,CAAC,CAAC;AAC7D,MAAM,mBAAmB,GAAG,EAAE,GAAG,EAAE,GAAG,IAAI,CAAC,CAAC,aAAa;AAEzD,oFAAoF;AACpF,SAAS,mBAAmB,CAAC,IAAY;IACvC,MAAM,GAAG,GAAG,GAAG,aAAa,QAAQ,OAAO,CAAC,GAAG,EAAE,CAAC;IAClD,aAAa,CAAC,GAAG,EAAE,IAAI,EAAE,OAAO,CAAC,CAAC;IAClC,UAAU,CAAC,GAAG,EAAE,aAAa,CAAC,CAAC;AACjC,CAAC;AAOD;;;GAGG;AACH,MAAM,UAAU,sBAAsB,CAAC,SAAmB;IACxD,IAAI,CAAC;QACH,SAAS,CAAC,QAAQ,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;QACzC,MAAM,QAAQ,GAAsB;YAClC,SAAS;YACT,SAAS,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;SACpC,CAAC;QACF,mBAAmB,CAAC,IAAI,CAAC,SAAS,CAAC,QAAQ,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC,CAAC;QACvD,GAAG,CAAC,IAAI,CACN,4CAA4C,SAAS,CAAC,MAAM,eAAe,SAAS,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAClG,CAAC;IACJ,CAAC;IAAC,OAAO,GAAG,EAAE,CAAC;QACb,GAAG,CAAC,IAAI,CAAC,kDAAkD,GAAG,EAAE,CAAC,CAAC;IACpE,CAAC;AACH,CAAC;AAED;;;;;;;;GAQG;AACH,MAAM,UAAU,oBAAoB,CAAC,QAAgB;IACnD,IAAI,CAAC;QACH,IAAI,CAAC,UAAU,CAAC,aAAa,CAAC;YAAE,OAAO,KAAK,CAAC;QAC7C,MAAM,GAAG,GAAG,YAAY,CAAC,aAAa,EAAE,OAAO,CAAC,CAAC;QACjD,MAAM,QAAQ,GAAG,IAAI,CAAC,KAAK,CAAC,GAAG,CAAsB,CAAC;QACtD,MAAM,GAAG,GAAG,IAAI,CAAC,GAAG,EAAE,GAAG,IAAI,IAAI,CAAC,QAAQ,CAAC,SAAS,CAAC,CAAC,OAAO,EAAE,CAAC;QAChE,IAAI,GAAG,GAAG,mBAAmB,EAAE,CAAC;YAC9B,GAAG,CAAC,IAAI,CACN,0CAA0C,IAAI,CAAC,KAAK,CAAC,GAAG,GAAG,IAAI,CAAC,gBAAgB,CACjF,CAAC;YACF,OAAO,KAAK,CAAC;QACf,CAAC;QACD,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAC,IAAI,CAAC,QAAQ,CAAC,SAAS,CAAC,QAAQ,CAAC,QAAQ,CAAC,EAAE,CAAC;YACjF,OAAO,KAAK,CAAC;QACf,CAAC;QACD,wDAAwD;QACxD,QAAQ,CAAC,SAAS,GAAG,QAAQ,CAAC,SAAS,CAAC,MAAM,CAAC,EAAE,CAAC,EAAE,CAAC,EAAE,KAAK,QAAQ,CAAC,CAAC;QACtE,IAAI,CAAC;YACH,IAAI,QAAQ,CAAC,SAAS,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;gBACpC,UAAU,CAAC,aAAa,CAAC,CAAC;YAC5B,CAAC;iBAAM,CAAC;gBACN,mBAAmB,CAAC,IAAI,CAAC,SAAS,CAAC,QAAQ,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC,CAAC;YACzD,CAAC;QACH,CAAC;QAAC,OAAO,QAAQ,EAAE,CAAC;YAClB,GAAG,CAAC,IAAI,CAAC,uCAAuC,QAAQ,mCAAmC,QAAQ,EAAE,CAAC,CAAC;YACvG,8EAA8E;QAChF,CAAC;QACD,GAAG,CAAC,IAAI,CAAC,2DAA2D,QAAQ,GAAG,CAAC,CAAC;QACjF,OAAO,IAAI,CAAC;IACd,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,KAAK,CAAC;IACf,CAAC;AACH,CAAC;AAED;;;;GAIG;AACH,MAAM,UAAU,sBAAsB;IACpC,IAAI,CAAC;QACH,IAAI,UAAU,CAAC,aAAa,CAAC,EAAE,CAAC;YAC9B,UAAU,CAAC,aAAa,CAAC,CAAC;YAC1B,GAAG,CAAC,IAAI,CAAC,wCAAwC,CAAC,CAAC;QACrD,CAAC;IACH,CAAC;IAAC,OAAO,GAAG,EAAE,CAAC;QACb,GAAG,CAAC,IAAI,CAAC,kDAAkD,GAAG,EAAE,CAAC,CAAC;IACpE,CAAC;AACH,CAAC"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"worker-cleanup.service.d.ts","sourceRoot":"","sources":["../../src/services/worker-cleanup.service.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"worker-cleanup.service.d.ts","sourceRoot":"","sources":["../../src/services/worker-cleanup.service.ts"],"names":[],"mappings":"AAQA,OAAO,KAAK,EAAE,sBAAsB,EAAE,MAAM,+BAA+B,CAAC;AAsB5E,wBAAsB,qBAAqB,CACzC,EAAE,EAAE,UAAU,CAAC,cAAc,cAAc,EAAE,YAAY,CAAC,EAC1D,QAAQ,EAAE;IAAE,gBAAgB,CAAC,MAAM,EAAE,MAAM,EAAE,QAAQ,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC,CAAA;CAAE,EAC/E,MAAM,EAAE,MAAM,EACd,eAAe,EAAE,sBAAsB,EACvC,KAAK,GAAE,MAA8B,GACpC,OAAO,CAAC;IAAE,OAAO,EAAE,MAAM,CAAC;IAAC,MAAM,EAAE,MAAM,EAAE,CAAA;CAAE,CAAC,CA+DhD"}
|
|
@@ -1,9 +1,32 @@
|
|
|
1
|
+
import { execSync } from "node:child_process";
|
|
2
|
+
import { unlinkSync } from "node:fs";
|
|
3
|
+
import { join } from "node:path";
|
|
1
4
|
import { archiveNotesForThread } from "../data/memory/semantic.js";
|
|
2
5
|
import { getExplicitTelegramTopicId } from "../data/memory/thread-registry.js";
|
|
3
6
|
import { synthesizeGhostMemory } from "../memory.js";
|
|
4
7
|
import { errorMessage } from "../utils.js";
|
|
5
|
-
import { spawnedThreads } from "./process.service.js";
|
|
8
|
+
import { spawnedThreads, readPidFiles, PROCESS_PIDS_DIR } from "./process.service.js";
|
|
6
9
|
import { log } from "../logger.js";
|
|
10
|
+
function killProcessTree(pid, threadId) {
|
|
11
|
+
try {
|
|
12
|
+
if (process.platform === "win32") {
|
|
13
|
+
execSync(`taskkill /F /T /PID ${pid}`, { timeout: 10000 });
|
|
14
|
+
}
|
|
15
|
+
else {
|
|
16
|
+
process.kill(pid, "SIGTERM");
|
|
17
|
+
}
|
|
18
|
+
log.info(`[worker-cleanup] Killed process tree for thread ${threadId} PID=${pid}`);
|
|
19
|
+
}
|
|
20
|
+
catch (err) {
|
|
21
|
+
// Process may already be dead — ignore
|
|
22
|
+
log.debug(`[worker-cleanup] Kill process ${pid} (thread ${threadId}): ${errorMessage(err)}`);
|
|
23
|
+
}
|
|
24
|
+
const pidFile = join(PROCESS_PIDS_DIR, `${threadId}.pid`);
|
|
25
|
+
try {
|
|
26
|
+
unlinkSync(pidFile);
|
|
27
|
+
}
|
|
28
|
+
catch { }
|
|
29
|
+
}
|
|
7
30
|
const DEFAULT_WORKER_TTL_MS = 60 * 60 * 1000;
|
|
8
31
|
let orphanSweepDone = false;
|
|
9
32
|
export async function cleanupExpiredWorkers(db, telegram, chatId, threadLifecycle, ttlMs = DEFAULT_WORKER_TTL_MS) {
|
|
@@ -26,6 +49,9 @@ export async function cleanupExpiredWorkers(db, telegram, chatId, threadLifecycl
|
|
|
26
49
|
if (spawnedThreads.some((t) => t.threadId === row.thread_id))
|
|
27
50
|
continue;
|
|
28
51
|
try {
|
|
52
|
+
const pidEntry = readPidFiles().find((e) => e.threadId === row.thread_id);
|
|
53
|
+
if (pidEntry)
|
|
54
|
+
killProcessTree(pidEntry.pid, row.thread_id);
|
|
29
55
|
try {
|
|
30
56
|
// For workers, thread_id IS the Telegram topic ID (created via createManagedTopic).
|
|
31
57
|
// Use explicit telegram_topic_id if set, otherwise fall back to thread_id.
|
|
@@ -81,10 +107,7 @@ async function cleanupSingleWorker(thread, db, telegram, chatId, threadLifecycle
|
|
|
81
107
|
}
|
|
82
108
|
catch { }
|
|
83
109
|
}
|
|
84
|
-
|
|
85
|
-
process.kill(thread.pid, "SIGTERM");
|
|
86
|
-
}
|
|
87
|
-
catch { }
|
|
110
|
+
killProcessTree(thread.pid, thread.threadId);
|
|
88
111
|
try {
|
|
89
112
|
const topicId = getExplicitTelegramTopicId(db, thread.threadId) ?? thread.threadId;
|
|
90
113
|
await telegram.deleteForumTopic(chatId, topicId);
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"worker-cleanup.service.js","sourceRoot":"","sources":["../../src/services/worker-cleanup.service.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,qBAAqB,EAAE,MAAM,4BAA4B,CAAC;AACnE,OAAO,EAAE,0BAA0B,EAAE,MAAM,mCAAmC,CAAC;AAC/E,OAAO,EAAE,qBAAqB,EAAE,MAAM,cAAc,CAAC;AACrD,OAAO,EAAE,YAAY,EAAE,MAAM,aAAa,CAAC;AAC3C,OAAO,EAAE,cAAc,EAAsB,MAAM,sBAAsB,CAAC;
|
|
1
|
+
{"version":3,"file":"worker-cleanup.service.js","sourceRoot":"","sources":["../../src/services/worker-cleanup.service.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,MAAM,oBAAoB,CAAC;AAC9C,OAAO,EAAE,UAAU,EAAE,MAAM,SAAS,CAAC;AACrC,OAAO,EAAE,IAAI,EAAE,MAAM,WAAW,CAAC;AACjC,OAAO,EAAE,qBAAqB,EAAE,MAAM,4BAA4B,CAAC;AACnE,OAAO,EAAE,0BAA0B,EAAE,MAAM,mCAAmC,CAAC;AAC/E,OAAO,EAAE,qBAAqB,EAAE,MAAM,cAAc,CAAC;AACrD,OAAO,EAAE,YAAY,EAAE,MAAM,aAAa,CAAC;AAC3C,OAAO,EAAE,cAAc,EAAsB,YAAY,EAAE,gBAAgB,EAAE,MAAM,sBAAsB,CAAC;AAE1G,OAAO,EAAE,GAAG,EAAE,MAAM,cAAc,CAAC;AAEnC,SAAS,eAAe,CAAC,GAAW,EAAE,QAAgB;IACpD,IAAI,CAAC;QACH,IAAI,OAAO,CAAC,QAAQ,KAAK,OAAO,EAAE,CAAC;YACjC,QAAQ,CAAC,uBAAuB,GAAG,EAAE,EAAE,EAAE,OAAO,EAAE,KAAK,EAAE,CAAC,CAAC;QAC7D,CAAC;aAAM,CAAC;YACN,OAAO,CAAC,IAAI,CAAC,GAAG,EAAE,SAAS,CAAC,CAAC;QAC/B,CAAC;QACD,GAAG,CAAC,IAAI,CAAC,mDAAmD,QAAQ,QAAQ,GAAG,EAAE,CAAC,CAAC;IACrF,CAAC;IAAC,OAAO,GAAG,EAAE,CAAC;QACb,uCAAuC;QACvC,GAAG,CAAC,KAAK,CAAC,iCAAiC,GAAG,YAAY,QAAQ,MAAM,YAAY,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;IAC/F,CAAC;IACD,MAAM,OAAO,GAAG,IAAI,CAAC,gBAAgB,EAAE,GAAG,QAAQ,MAAM,CAAC,CAAC;IAC1D,IAAI,CAAC;QAAC,UAAU,CAAC,OAAO,CAAC,CAAC;IAAC,CAAC;IAAC,MAAM,CAAC,CAAA,CAAC;AACvC,CAAC;AAED,MAAM,qBAAqB,GAAG,EAAE,GAAG,EAAE,GAAG,IAAI,CAAC;AAC7C,IAAI,eAAe,GAAG,KAAK,CAAC;AAE5B,MAAM,CAAC,KAAK,UAAU,qBAAqB,CACzC,EAA0D,EAC1D,QAA+E,EAC/E,MAAc,EACd,eAAuC,EACvC,QAAgB,qBAAqB;IAErC,MAAM,MAAM,GAAG,EAAE,OAAO,EAAE,CAAC,EAAE,MAAM,EAAE,EAAc,EAAE,CAAC;IACtD,MAAM,GAAG,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;IACvB,KAAK,MAAM,MAAM,IAAI,cAAc,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,UAAU,KAAK,QAAQ,IAAI,GAAG,GAAG,CAAC,CAAC,SAAS,GAAG,KAAK,CAAC,EAAE,CAAC;QAC1G,IAAI,CAAC;YACH,MAAM,mBAAmB,CAAC,MAAM,EAAE,EAAE,EAAE,QAAQ,EAAE,MAAM,EAAE,eAAe,CAAC,CAAC;YACzE,MAAM,CAAC,OAAO,EAAE,CAAC;QACnB,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACb,MAAM,CAAC,MAAM,CAAC,IAAI,CAAC,UAAU,MAAM,CAAC,QAAQ,KAAK,YAAY,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;QACxE,CAAC;IACH,CAAC;IACD,IAAI,CAAC;QACH,MAAM,MAAM,GAAG,IAAI,IAAI,CAAC,GAAG,GAAG,KAAK,CAAC,CAAC,WAAW,EAAE,CAAC;QACnD,MAAM,SAAS,GAAG,EAAE,CAAC,OAAO,CAC1B;6GACuG,CACxG,CAAC,GAAG,CAAC,MAAM,CAA8D,CAAC;QAC3E,KAAK,MAAM,GAAG,IAAI,SAAS,EAAE,CAAC;YAC5B,IAAI,cAAc,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,QAAQ,KAAK,GAAG,CAAC,SAAS,CAAC;gBAAE,SAAS;YACvE,IAAI,CAAC;gBACH,MAAM,QAAQ,GAAG,YAAY,EAAE,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,QAAQ,KAAK,GAAG,CAAC,SAAS,CAAC,CAAC;gBAC1E,IAAI,QAAQ;oBAAE,eAAe,CAAC,QAAQ,CAAC,GAAG,EAAE,GAAG,CAAC,SAAS,CAAC,CAAC;gBAC3D,IAAI,CAAC;oBACH,oFAAoF;oBACpF,2EAA2E;oBAC3E,MAAM,OAAO,GAAG,GAAG,CAAC,iBAAiB,IAAI,GAAG,CAAC,SAAS,CAAC;oBACvD,MAAM,QAAQ,CAAC,gBAAgB,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;gBACnD,CAAC;gBAAC,MAAM,CAAC,CAAA,CAAC;gBACV,eAAe,CAAC,aAAa,CAAC,EAAE,EAAE,GAAG,CAAC,SAAS,CAAC,CAAC;gBACjD,IAAI,CAAC;oBAAC,qBAAqB,CAAC,EAAE,EAAE,GAAG,CAAC,SAAS,CAAC,CAAC;gBAAC,CAAC;gBAAC,MAAM,CAAC,CAAA,CAAC;gBAC1D,MAAM,CAAC,OAAO,EAAE,CAAC;YACnB,CAAC;YAAC,OAAO,GAAG,EAAE,CAAC;gBACb,MAAM,GAAG,GAAG,UAAU,GAAG,CAAC,SAAS,KAAK,YAAY,CAAC,GAAG,CAAC,EAAE,CAAC;gBAC5D,MAAM,CAAC,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;gBACxB,GAAG,CAAC,IAAI,CAAC,uDAAuD,GAAG,EAAE,CAAC,CAAC;YACzE,CAAC;QACH,CAAC;IACH,CAAC;IAAC,OAAO,GAAG,EAAE,CAAC;QACb,GAAG,CAAC,IAAI,CAAC,sDAAsD,YAAY,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;IACtF,CAAC;IACD,6EAA6E;IAC7E,oEAAoE;IACpE,IAAI,CAAC,eAAe;QAAE,IAAI,CAAC;YACzB,eAAe,GAAG,IAAI,CAAC;YACvB,MAAM,UAAU,GAAG,EAAE,CAAC,OAAO,CAC3B;qDAC+C,CAChD,CAAC,GAAG,EAA+D,CAAC;YACrE,KAAK,MAAM,GAAG,IAAI,UAAU,EAAE,CAAC;gBAC7B,MAAM,OAAO,GAAG,GAAG,CAAC,iBAAiB,IAAI,GAAG,CAAC,SAAS,CAAC;gBACvD,IAAI,CAAC;oBACH,MAAM,QAAQ,CAAC,gBAAgB,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;oBACjD,MAAM,CAAC,OAAO,EAAE,CAAC;oBACjB,GAAG,CAAC,IAAI,CAAC,yCAAyC,OAAO,wBAAwB,GAAG,CAAC,SAAS,EAAE,CAAC,CAAC;gBACpG,CAAC;gBAAC,MAAM,CAAC;oBACP,4CAA4C;gBAC9C,CAAC;YACH,CAAC;QACH,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACb,GAAG,CAAC,IAAI,CAAC,mDAAmD,YAAY,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;QACnF,CAAC;IAED,OAAO,MAAM,CAAC;AAChB,CAAC;AAED,KAAK,UAAU,mBAAmB,CAChC,MAAqB,EACrB,EAA0D,EAC1D,QAA+E,EAC/E,MAAc,EACd,eAAuC;IAEvC,IAAI,MAAM,CAAC,oBAAoB,KAAK,SAAS,EAAE,CAAC;QAC9C,IAAI,CAAC;YAAC,MAAM,qBAAqB,CAAC,EAAE,EAAE,MAAM,CAAC,QAAQ,EAAE,MAAM,CAAC,oBAAoB,EAAE,MAAM,CAAC,IAAI,CAAC,CAAC;QAAC,CAAC;QAAC,MAAM,CAAC,CAAA,CAAC;IAC9G,CAAC;IACD,eAAe,CAAC,MAAM,CAAC,GAAG,EAAE,MAAM,CAAC,QAAQ,CAAC,CAAC;IAC7C,IAAI,CAAC;QACH,MAAM,OAAO,GAAG,0BAA0B,CAAC,EAAE,EAAE,MAAM,CAAC,QAAQ,CAAC,IAAI,MAAM,CAAC,QAAQ,CAAC;QACnF,MAAM,QAAQ,CAAC,gBAAgB,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;IACnD,CAAC;IAAC,MAAM,CAAC,CAAA,CAAC;IACV,IAAI,CAAC;QAAC,eAAe,CAAC,aAAa,CAAC,EAAE,EAAE,MAAM,CAAC,QAAQ,CAAC,CAAC;IAAC,CAAC;IAAC,MAAM,CAAC,CAAA,CAAC;IACpE,IAAI,CAAC;QAAC,qBAAqB,CAAC,EAAE,EAAE,MAAM,CAAC,QAAQ,CAAC,CAAC;IAAC,CAAC;IAAC,MAAM,CAAC,CAAA,CAAC;IAC5D,MAAM,GAAG,GAAG,cAAc,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC;IAC3C,IAAI,GAAG,KAAK,CAAC,CAAC;QAAE,cAAc,CAAC,MAAM,CAAC,GAAG,EAAE,CAAC,CAAC,CAAC;AAChD,CAAC"}
|
package/package.json
CHANGED
package/supervisor/main.go
CHANGED
|
@@ -166,23 +166,37 @@ func runSupervisor(runningAsService bool) error {
|
|
|
166
166
|
mcp := NewMCPClient(cfg.MCPHttpPort, cfg.MCPHttpSecret)
|
|
167
167
|
mcp.Log = log
|
|
168
168
|
|
|
169
|
-
//
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
169
|
+
// Check if MCP server is already running and healthy — inherit it instead of
|
|
170
|
+
// killing and restarting (allows transparent supervisor binary updates).
|
|
171
|
+
inherited := false
|
|
172
|
+
if oldPid, pidErr := ReadPIDFile(cfg.Paths.ServerPID); pidErr == nil && oldPid > 0 && IsProcessAlive(oldPid) {
|
|
173
|
+
if mcp.IsServerReady(context.Background()) {
|
|
174
|
+
log.Info("Inherited running MCP server (PID %d) — skipping full restart", oldPid)
|
|
175
|
+
inherited = true
|
|
176
|
+
} else {
|
|
177
|
+
log.Info("MCP server process (PID %d) did not pass health check — proceeding with full restart", oldPid)
|
|
178
|
+
}
|
|
177
179
|
}
|
|
178
|
-
_ = os.Remove(cfg.Paths.ServerPID)
|
|
179
|
-
KillByPort(cfg.MCPHttpPort, log)
|
|
180
180
|
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
181
|
+
if !inherited {
|
|
182
|
+
// Kill orphan thread processes from previous runs, then clean PID files
|
|
183
|
+
KillOrphanThreads(cfg.Paths.PIDsDir, log)
|
|
184
|
+
|
|
185
|
+
// Kill orphan MCP server from previous run
|
|
186
|
+
if oldPid, pidErr := ReadPIDFile(cfg.Paths.ServerPID); pidErr == nil && oldPid > 0 && IsProcessAlive(oldPid) {
|
|
187
|
+
log.Info("Killing orphan MCP server (PID %d) from previous run", oldPid)
|
|
188
|
+
_ = KillProcess(oldPid, log)
|
|
189
|
+
time.Sleep(1 * time.Second) // allow port to release
|
|
190
|
+
}
|
|
191
|
+
_ = os.Remove(cfg.Paths.ServerPID)
|
|
192
|
+
KillByPort(cfg.MCPHttpPort, log)
|
|
193
|
+
|
|
194
|
+
// Spawn MCP server
|
|
195
|
+
_, err = SpawnMCPServer(cfg, log)
|
|
196
|
+
if err != nil {
|
|
197
|
+
log.Error("Failed to start MCP server: %v", err)
|
|
198
|
+
return fmt.Errorf("failed to start MCP server: %w", err)
|
|
199
|
+
}
|
|
186
200
|
}
|
|
187
201
|
|
|
188
202
|
// Wait for server to be ready
|
package/supervisor/updater.go
CHANGED
|
@@ -187,7 +187,7 @@ func (u *Updater) checkAndUpdate(ctx context.Context) {
|
|
|
187
187
|
u.state.Transition(updateScopeMCP, updatePhaseFailed, remote, local, err.Error())
|
|
188
188
|
}
|
|
189
189
|
|
|
190
|
-
notifyUpdaterOperator(u.cfg, u.log, fmt.Sprintf("⚙️ Supervisor: updating
|
|
190
|
+
notifyUpdaterOperator(u.cfg, u.log, fmt.Sprintf("⚙️ Supervisor: updating MCP server v%s → v%s. Grace period %v. MCP server will restart — threads will reconnect automatically.", local, remote, u.cfg.GracePeriod), 0)
|
|
191
191
|
|
|
192
192
|
// Grace period
|
|
193
193
|
u.log.Info("Grace period %v...", u.cfg.GracePeriod)
|
|
@@ -246,7 +246,7 @@ func (u *Updater) checkAndUpdate(ctx context.Context) {
|
|
|
246
246
|
if err != nil {
|
|
247
247
|
u.log.Error("All spawn attempts failed — server is down!")
|
|
248
248
|
markFailed(err)
|
|
249
|
-
notifyUpdaterOperator(u.cfg, u.log, "🔴 Supervisor: update FAILED — server is down! Manual intervention required.", 0)
|
|
249
|
+
notifyUpdaterOperator(u.cfg, u.log, "🔴 Supervisor: MCP server update FAILED — server is down! Manual intervention required.", 0)
|
|
250
250
|
return
|
|
251
251
|
}
|
|
252
252
|
|
|
@@ -257,7 +257,7 @@ func (u *Updater) checkAndUpdate(ctx context.Context) {
|
|
|
257
257
|
u.setLocalVersion(remote)
|
|
258
258
|
u.state.Transition(updateScopeMCP, updatePhaseIdle, remote, local, "")
|
|
259
259
|
|
|
260
|
-
notifyUpdaterOperator(u.cfg, u.log, fmt.Sprintf("✅ Supervisor:
|
|
260
|
+
notifyUpdaterOperator(u.cfg, u.log, fmt.Sprintf("✅ Supervisor: MCP server updated to v%s. Server ready — threads reconnected.", remote), 0)
|
|
261
261
|
u.log.Info("Update complete: v%s → v%s", local, remote)
|
|
262
262
|
|
|
263
263
|
// Reset start time for min uptime tracking
|
|
@@ -274,7 +274,7 @@ func (u *Updater) verifyUpdatedMCPServerReady(ctx context.Context, remote, local
|
|
|
274
274
|
errMsg := fmt.Sprintf("updated MCP server did not become ready within %v after restart (pid=%d)", mcpUpdateReadyTimeout, pid)
|
|
275
275
|
u.log.Error(errMsg)
|
|
276
276
|
u.state.Transition(updateScopeMCP, updatePhaseFailed, remote, local, errMsg)
|
|
277
|
-
notifyUpdaterOperator(u.cfg, u.log, fmt.Sprintf("🔴 Supervisor: update to v%s FAILED verification. Server did not become ready after restart.", remote), 0)
|
|
277
|
+
notifyUpdaterOperator(u.cfg, u.log, fmt.Sprintf("🔴 Supervisor: MCP server update to v%s FAILED verification. Server did not become ready after restart.", remote), 0)
|
|
278
278
|
return false
|
|
279
279
|
}
|
|
280
280
|
|
|
@@ -407,7 +407,7 @@ func (u *Updater) checkSupervisorUpdate(ctx context.Context) {
|
|
|
407
407
|
u.state.Transition(updateScopeSupervisor, updatePhaseFailed, remote, local, err.Error())
|
|
408
408
|
}
|
|
409
409
|
|
|
410
|
-
notifyUpdaterOperator(u.cfg, u.log, fmt.Sprintf("⚙️ Supervisor: updating binary %s → %s. Grace period %v
|
|
410
|
+
notifyUpdaterOperator(u.cfg, u.log, fmt.Sprintf("⚙️ Supervisor: updating supervisor binary %s → %s. Grace period %v. Supervisor process will restart — MCP server unaffected.", local, remote, u.cfg.GracePeriod), 0)
|
|
411
411
|
|
|
412
412
|
select {
|
|
413
413
|
case <-ctx.Done():
|
|
@@ -419,7 +419,7 @@ func (u *Updater) checkSupervisorUpdate(ctx context.Context) {
|
|
|
419
419
|
if err := u.downloadSupervisorBinary(ctx, downloadURL); err != nil {
|
|
420
420
|
markFailed(err)
|
|
421
421
|
u.log.Error("Supervisor binary download failed: %v", err)
|
|
422
|
-
notifyUpdaterOperator(u.cfg, u.log, fmt.Sprintf("🔴 Supervisor: binary update to %s failed during download.", remote), 0)
|
|
422
|
+
notifyUpdaterOperator(u.cfg, u.log, fmt.Sprintf("🔴 Supervisor: supervisor binary update to %s failed during download.", remote), 0)
|
|
423
423
|
return
|
|
424
424
|
}
|
|
425
425
|
|
|
@@ -427,11 +427,11 @@ func (u *Updater) checkSupervisorUpdate(ctx context.Context) {
|
|
|
427
427
|
_ = os.Remove(u.cfg.Paths.PendingBinary)
|
|
428
428
|
markFailed(err)
|
|
429
429
|
u.log.Error("Failed to stage supervisor version %s: %v", remote, err)
|
|
430
|
-
notifyUpdaterOperator(u.cfg, u.log, fmt.Sprintf("🔴 Supervisor: binary update to %s failed during staging.", remote), 0)
|
|
430
|
+
notifyUpdaterOperator(u.cfg, u.log, fmt.Sprintf("🔴 Supervisor: supervisor binary update to %s failed during staging.", remote), 0)
|
|
431
431
|
return
|
|
432
432
|
}
|
|
433
433
|
u.state.Transition(updateScopeSupervisor, updatePhaseStaged, remote, local, "")
|
|
434
|
-
notifyUpdaterOperator(u.cfg, u.log, fmt.Sprintf("⚙️ Supervisor:
|
|
434
|
+
notifyUpdaterOperator(u.cfg, u.log, fmt.Sprintf("⚙️ Supervisor: supervisor binary %s downloaded. Restarting supervisor to apply update — MCP server will continue running.", remote), 0)
|
|
435
435
|
|
|
436
436
|
// Reset start time so minimum uptime is re-enforced after restart
|
|
437
437
|
u.startAt = time.Now()
|
|
@@ -440,7 +440,7 @@ func (u *Updater) checkSupervisorUpdate(ctx context.Context) {
|
|
|
440
440
|
if err != nil {
|
|
441
441
|
markFailed(err)
|
|
442
442
|
u.log.Error("Failed to detect service mode for restart: %v", err)
|
|
443
|
-
notifyUpdaterOperator(u.cfg, u.log, "🔴 Supervisor:
|
|
443
|
+
notifyUpdaterOperator(u.cfg, u.log, "🔴 Supervisor: supervisor binary downloaded but service detection failed.", 0)
|
|
444
444
|
return
|
|
445
445
|
}
|
|
446
446
|
u.state.Transition(updateScopeSupervisor, updatePhaseRestarting, remote, local, "")
|
|
@@ -449,7 +449,7 @@ func (u *Updater) checkSupervisorUpdate(ctx context.Context) {
|
|
|
449
449
|
if err := scheduleServiceRestartForUpdate(u.log); err != nil {
|
|
450
450
|
markFailed(err)
|
|
451
451
|
u.log.Error("Failed to schedule service restart: %v", err)
|
|
452
|
-
notifyUpdaterOperator(u.cfg, u.log, "🔴 Supervisor:
|
|
452
|
+
notifyUpdaterOperator(u.cfg, u.log, "🔴 Supervisor: supervisor binary downloaded but service restart scheduling failed.", 0)
|
|
453
453
|
}
|
|
454
454
|
return
|
|
455
455
|
}
|
|
@@ -457,7 +457,7 @@ func (u *Updater) checkSupervisorUpdate(ctx context.Context) {
|
|
|
457
457
|
if err := requestSupervisorRestart(u.log); err != nil {
|
|
458
458
|
markFailed(err)
|
|
459
459
|
u.log.Error("Failed to signal supervisor for restart: %v", err)
|
|
460
|
-
notifyUpdaterOperator(u.cfg, u.log, "🔴 Supervisor:
|
|
460
|
+
notifyUpdaterOperator(u.cfg, u.log, "🔴 Supervisor: supervisor binary downloaded but restart signal failed.", 0)
|
|
461
461
|
}
|
|
462
462
|
}
|
|
463
463
|
|