neohive 6.1.1 → 6.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/cli.js +16 -0
- package/dashboard.js +16 -10
- package/lib/agents.js +9 -3
- package/package.json +1 -1
- package/server.js +10 -17
package/cli.js
CHANGED
|
@@ -718,6 +718,22 @@ function reset() {
|
|
|
718
718
|
console.log(' [warn] Could not archive: ' + e.message + ' — proceeding with reset anyway.');
|
|
719
719
|
}
|
|
720
720
|
|
|
721
|
+
// Kill any running MCP server processes before wiping data.
|
|
722
|
+
// Otherwise orphaned heartbeat intervals keep writing into the fresh directory.
|
|
723
|
+
try {
|
|
724
|
+
const agentsFile = path.join(targetDir, 'agents.json');
|
|
725
|
+
if (fs.existsSync(agentsFile)) {
|
|
726
|
+
const agents = JSON.parse(fs.readFileSync(agentsFile, 'utf8'));
|
|
727
|
+
let killed = 0;
|
|
728
|
+
for (const [name, info] of Object.entries(agents)) {
|
|
729
|
+
if (info.pid) {
|
|
730
|
+
try { process.kill(info.pid, 'SIGTERM'); killed++; } catch {}
|
|
731
|
+
}
|
|
732
|
+
}
|
|
733
|
+
if (killed > 0) console.log(' [ok] Terminated ' + killed + ' running agent process(es)');
|
|
734
|
+
}
|
|
735
|
+
} catch {}
|
|
736
|
+
|
|
721
737
|
fs.rmSync(targetDir, { recursive: true, force: true });
|
|
722
738
|
fs.mkdirSync(targetDir, { recursive: true });
|
|
723
739
|
console.log(' Cleared all data from ' + targetDir);
|
package/dashboard.js
CHANGED
|
@@ -313,22 +313,23 @@ function readJson(file) {
|
|
|
313
313
|
}
|
|
314
314
|
|
|
315
315
|
function isPidAlive(pid, lastActivity) {
|
|
316
|
-
const STALE_THRESHOLD = 30000; // 30s — 3x heartbeat interval
|
|
316
|
+
const STALE_THRESHOLD = 30000; // 30s — 3x heartbeat interval
|
|
317
|
+
const PID_TRUST_WINDOW = 60000; // 60s — beyond this, PID check is unreliable (OS reuses PIDs)
|
|
317
318
|
|
|
318
|
-
// PRIORITY 1: Trust heartbeat freshness over PID status
|
|
319
|
-
// Heartbeats are written by the actual running process — if fresh, agent is alive
|
|
320
|
-
// regardless of whether process.kill can see the PID
|
|
321
319
|
if (lastActivity) {
|
|
322
320
|
const stale = Date.now() - new Date(lastActivity).getTime();
|
|
323
321
|
if (stale < STALE_THRESHOLD) return true;
|
|
322
|
+
// A real neohive agent writes heartbeat every 10s. If 60s have passed
|
|
323
|
+
// without one, the PID belongs to a different process (OS recycled it).
|
|
324
|
+
if (stale > PID_TRUST_WINDOW) return false;
|
|
324
325
|
}
|
|
325
326
|
|
|
326
|
-
//
|
|
327
|
+
// Heartbeat is stale but within the trust window — verify PID as fallback
|
|
327
328
|
try {
|
|
328
329
|
process.kill(pid, 0);
|
|
329
|
-
return true;
|
|
330
|
+
return true;
|
|
330
331
|
} catch {
|
|
331
|
-
return false;
|
|
332
|
+
return false;
|
|
332
333
|
}
|
|
333
334
|
}
|
|
334
335
|
|
|
@@ -3967,12 +3968,17 @@ function startFileWatcher() {
|
|
|
3967
3968
|
|
|
3968
3969
|
startFileWatcher();
|
|
3969
3970
|
|
|
3970
|
-
// macOS fs.watch()
|
|
3971
|
-
//
|
|
3971
|
+
// macOS fs.watch() silently stops emitting events when the watched directory is
|
|
3972
|
+
// deleted and recreated (e.g. reset --force). The watcher object stays non-null
|
|
3973
|
+
// but is dead. Force-restart it every 30s to guarantee the dashboard stays live.
|
|
3974
|
+
let _lastWatcherRestart = Date.now();
|
|
3972
3975
|
setInterval(() => {
|
|
3973
3976
|
const dataDir = resolveDataDir();
|
|
3974
3977
|
if (!fs.existsSync(dataDir)) return;
|
|
3975
|
-
if (!fsWatcher
|
|
3978
|
+
if (!fsWatcher || Date.now() - _lastWatcherRestart > 30000) {
|
|
3979
|
+
startFileWatcher();
|
|
3980
|
+
_lastWatcherRestart = Date.now();
|
|
3981
|
+
}
|
|
3976
3982
|
}, 5000).unref();
|
|
3977
3983
|
|
|
3978
3984
|
server.on('error', (err) => {
|
package/lib/agents.js
CHANGED
|
@@ -12,6 +12,7 @@ const _pidAliveCache = {};
|
|
|
12
12
|
let _isAutonomousMode = () => false;
|
|
13
13
|
function setAutonomousModeCheck(fn) { _isAutonomousMode = fn; }
|
|
14
14
|
|
|
15
|
+
const PID_TRUST_WINDOW_MS = 60000;
|
|
15
16
|
function isPidAlive(pid, lastActivity) {
|
|
16
17
|
const cacheKey = `${pid}_${lastActivity}`;
|
|
17
18
|
const cached = _pidAliveCache[cacheKey];
|
|
@@ -22,9 +23,14 @@ function isPidAlive(pid, lastActivity) {
|
|
|
22
23
|
|
|
23
24
|
if (lastActivity) {
|
|
24
25
|
const stale = Date.now() - new Date(lastActivity).getTime();
|
|
25
|
-
if (stale < STALE_THRESHOLD)
|
|
26
|
-
|
|
27
|
-
|
|
26
|
+
if (stale < STALE_THRESHOLD) {
|
|
27
|
+
alive = true;
|
|
28
|
+
} else if (stale > PID_TRUST_WINDOW_MS) {
|
|
29
|
+
alive = false;
|
|
30
|
+
} else {
|
|
31
|
+
try { process.kill(pid, 0); alive = true; } catch { alive = false; }
|
|
32
|
+
}
|
|
33
|
+
} else {
|
|
28
34
|
try { process.kill(pid, 0); alive = true; } catch { alive = false; }
|
|
29
35
|
}
|
|
30
36
|
_pidAliveCache[cacheKey] = { alive, ts: Date.now() };
|
package/package.json
CHANGED
package/server.js
CHANGED
|
@@ -454,40 +454,33 @@ function getAcks() {
|
|
|
454
454
|
}
|
|
455
455
|
}
|
|
456
456
|
|
|
457
|
-
// Cache for isPidAlive results — avoids redundant process.kill calls at 100-agent scale
|
|
458
457
|
const _pidAliveCache = {};
|
|
458
|
+
const PID_TRUST_WINDOW_MS = 60000; // Beyond 60s without heartbeat, PID is unreliable (OS reuses PIDs)
|
|
459
459
|
function isPidAlive(pid, lastActivity) {
|
|
460
|
-
// Cache with 5s TTL — PID status doesn't change faster than heartbeats
|
|
461
460
|
const cacheKey = `${pid}_${lastActivity}`;
|
|
462
461
|
const cached = _pidAliveCache[cacheKey];
|
|
463
462
|
if (cached && Date.now() - cached.ts < SERVER_CONFIG.AGENT_CACHE_TTL_MS) return cached.alive;
|
|
464
463
|
|
|
465
|
-
// 30s stale threshold — 3x the 10s heartbeat interval, catches dead agents faster
|
|
466
464
|
const STALE_THRESHOLD = SERVER_CONFIG.AGENT_STALE_THRESHOLD_MS;
|
|
467
465
|
let alive = false;
|
|
468
466
|
|
|
469
|
-
// PRIORITY 1: Trust heartbeat freshness over PID status
|
|
470
|
-
// Heartbeat files are written by the actual running process — if fresh, agent is alive
|
|
471
|
-
// regardless of whether process.kill can see the PID (cross-process PID visibility issues)
|
|
472
467
|
if (lastActivity) {
|
|
473
468
|
const stale = Date.now() - new Date(lastActivity).getTime();
|
|
474
469
|
if (stale < STALE_THRESHOLD) {
|
|
475
470
|
alive = true;
|
|
476
|
-
}
|
|
477
|
-
|
|
478
|
-
|
|
479
|
-
// PRIORITY 2: If heartbeat is stale, verify PID is actually dead
|
|
480
|
-
if (!alive) {
|
|
481
|
-
try {
|
|
482
|
-
process.kill(pid, 0);
|
|
483
|
-
alive = true; // PID exists — agent is alive even with stale heartbeat
|
|
484
|
-
} catch {
|
|
485
|
-
// PID dead AND heartbeat stale — agent is truly dead
|
|
471
|
+
} else if (stale > PID_TRUST_WINDOW_MS) {
|
|
472
|
+
// A real neohive agent writes heartbeat every 10s. If 60s have passed
|
|
473
|
+
// without one, the PID belongs to a different process (OS recycled it).
|
|
486
474
|
alive = false;
|
|
475
|
+
} else {
|
|
476
|
+
// Within trust window — verify PID as fallback
|
|
477
|
+
try { process.kill(pid, 0); alive = true; } catch { alive = false; }
|
|
487
478
|
}
|
|
479
|
+
} else {
|
|
480
|
+
try { process.kill(pid, 0); alive = true; } catch { alive = false; }
|
|
488
481
|
}
|
|
482
|
+
|
|
489
483
|
_pidAliveCache[cacheKey] = { alive, ts: Date.now() };
|
|
490
|
-
// Evict old entries (keep cache small)
|
|
491
484
|
const keys = Object.keys(_pidAliveCache);
|
|
492
485
|
if (keys.length > 200) {
|
|
493
486
|
const cutoff = Date.now() - SERVER_CONFIG.POLL_INTERVAL_MS * 5;
|