@evomap/evolver 1.87.4 → 1.88.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/index.js +848 -33
- package/package.json +1 -1
- package/scripts/build_binaries.js +11 -1
- package/src/adapters/hookAdapter.js +3 -1
- package/src/adapters/scripts/_runtimePaths.js +24 -0
- package/src/adapters/scripts/evolver-session-end.js +110 -78
- package/src/adapters/scripts/evolver-session-start.js +100 -0
- package/src/config.js +43 -8
- package/src/evolve/guards.js +1 -1
- package/src/evolve/pipeline/collect.js +1 -1
- package/src/evolve/pipeline/dispatch.js +1 -1
- package/src/evolve/pipeline/enrich.js +1 -1
- package/src/evolve/pipeline/hub.js +1 -1
- package/src/evolve/pipeline/select.js +1 -1
- package/src/evolve/pipeline/signals.js +1 -1
- package/src/evolve/utils.js +1 -1
- package/src/evolve.js +1 -1
- package/src/forceUpdate.js +42 -21
- package/src/gep/a2aProtocol.js +1 -1
- package/src/gep/assetStore.js +40 -0
- package/src/gep/candidateEval.js +1 -1
- package/src/gep/candidates.js +1 -1
- package/src/gep/contentHash.js +1 -1
- package/src/gep/crypto.js +1 -1
- package/src/gep/curriculum.js +1 -1
- package/src/gep/deviceId.js +1 -1
- package/src/gep/envFingerprint.js +1 -1
- package/src/gep/epigenetics.js +1 -1
- package/src/gep/explore.js +1 -1
- package/src/gep/featureFlags.js +4 -0
- package/src/gep/gitOps.js +7 -2
- package/src/gep/hash.js +1 -1
- package/src/gep/hubFetch.js +1 -1
- package/src/gep/hubReview.js +1 -1
- package/src/gep/hubSearch.js +1 -1
- package/src/gep/hubVerify.js +1 -1
- package/src/gep/idleScheduler.js +78 -0
- package/src/gep/learningSignals.js +1 -1
- package/src/gep/mailboxTransport.js +34 -0
- package/src/gep/memoryGraph.js +1 -1
- package/src/gep/memoryGraphAdapter.js +1 -1
- package/src/gep/mutation.js +1 -1
- package/src/gep/narrativeMemory.js +1 -1
- package/src/gep/openPRRegistry.js +1 -1
- package/src/gep/paths.js +16 -2
- package/src/gep/personality.js +1 -1
- package/src/gep/policyCheck.js +1 -1
- package/src/gep/prompt.js +1 -1
- package/src/gep/recallVerifier.js +1 -1
- package/src/gep/reflection.js +1 -1
- package/src/gep/selector.js +1 -1
- package/src/gep/skillDistiller.js +1 -1
- package/src/gep/solidify.js +1 -1
- package/src/gep/strategy.js +1 -1
- package/src/gep/validator/index.js +46 -1
- package/src/gep/validator/sandboxExecutor.js +10 -1
- package/src/gep/validator/stakeBootstrap.js +3 -0
- package/src/gep/workspaceKeychain.js +1 -1
- package/src/ops/lifecycle.js +79 -10
- package/src/ops/skills_monitor.js +2 -1
- package/src/proxy/index.js +7 -1
- package/src/proxy/lifecycle/manager.js +77 -4
- package/src/proxy/mailbox/store.js +52 -2
- package/src/proxy/server/settings.js +16 -2
- package/src/proxy/sync/inbound.js +14 -1
package/index.js
CHANGED
|
@@ -42,14 +42,94 @@ try {
|
|
|
42
42
|
const evolve = require('./src/evolve');
|
|
43
43
|
const { solidify } = require('./src/gep/solidify');
|
|
44
44
|
const path = require('path');
|
|
45
|
+
const os = require('os');
|
|
45
46
|
const { getRepoRoot } = require('./src/gep/paths');
|
|
46
47
|
const fs = require('fs');
|
|
47
48
|
const { spawn } = require('child_process');
|
|
48
49
|
|
|
50
|
+
// Interruptible sleep: SIGCONT (and any future wake hook) can short-circuit
|
|
51
|
+
// pending sleeps so a daemon that just woke from macOS sleep doesn't sit
|
|
52
|
+
// out the rest of its pre-sleep adaptive-sleep window on the resumed
|
|
53
|
+
// monotonic clock. Without this, the heartbeat side recovers via the
|
|
54
|
+
// drift detector but the outer evolve cycle stays paused up to maxSleepMs
|
|
55
|
+
// (default 5 min) after wake. Each call tracks its own resolver in
|
|
56
|
+
// _activeSleeps so the wake hook can resolve all of them.
|
|
57
|
+
const _activeSleeps = new Set();
|
|
49
58
|
function sleepMs(ms) {
|
|
50
59
|
const n = parseInt(String(ms), 10);
|
|
51
60
|
const t = Number.isFinite(n) ? Math.max(0, n) : 0;
|
|
52
|
-
return new Promise(resolve =>
|
|
61
|
+
return new Promise(resolve => {
|
|
62
|
+
let done = false;
|
|
63
|
+
const finish = () => {
|
|
64
|
+
if (done) return;
|
|
65
|
+
done = true;
|
|
66
|
+
clearTimeout(timer);
|
|
67
|
+
_activeSleeps.delete(finish);
|
|
68
|
+
resolve();
|
|
69
|
+
};
|
|
70
|
+
const timer = setTimeout(finish, t);
|
|
71
|
+
// NOTE: intentionally NOT calling timer.unref() here. When the daemon is in
|
|
72
|
+
// a long adaptive sleep (up to maxSleepMs = 5 min by default), this timer is
|
|
73
|
+
// often the ONLY ref'd handle keeping the event loop alive. All other timers
|
|
74
|
+
// (_heartbeatTimer, _heartbeatDriftInterval, _selfDrivingPollTimer, etc.) are
|
|
75
|
+
// unref'd, so once the evolve loop's sleepMs timer was also unref'd, Node.js
|
|
76
|
+
// could see zero ref'd handles and silently exit the process mid-sleep. That
|
|
77
|
+
// was the root cause of "first launch ok, idle for a while, then evolver dead
|
|
78
|
+
// with no log trace" on macOS. A ref'd sleep timer is the load-bearing event-
|
|
79
|
+
// loop anchor during idle periods; it fires within maxSleepMs and the daemon
|
|
80
|
+
// then reschedules itself normally. Leaving it ref'd has no observable cost.
|
|
81
|
+
_activeSleeps.add(finish);
|
|
82
|
+
});
|
|
83
|
+
}
|
|
84
|
+
function _interruptAllSleeps() {
|
|
85
|
+
if (_activeSleeps.size === 0) return;
|
|
86
|
+
// Snapshot first because resolvers mutate the set as they run.
|
|
87
|
+
const finishers = Array.from(_activeSleeps);
|
|
88
|
+
for (const fn of finishers) {
|
|
89
|
+
try { fn(); } catch (_) {}
|
|
90
|
+
}
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
// Round-6 (§19.5): heartbeat-internal wake recovery (drainPool +
|
|
94
|
+
// pokeHeartbeat + SSE restart + self-driving-poll re-arm) lives in
|
|
95
|
+
// a2aProtocol so the drift detector can drive it directly. Process-
|
|
96
|
+
// level wake hooks (sleepMs interrupter, validator daemon poke) are
|
|
97
|
+
// registered with a2aProtocol so both the SIGCONT handler and the
|
|
98
|
+
// drift detector long-sleep branch run them. Lazy-register so requires
|
|
99
|
+
// resolve cleanly under test (single Set of registered hooks; cheap to
|
|
100
|
+
// re-register idempotently).
|
|
101
|
+
let _wakeHooksRegistered = false;
|
|
102
|
+
function _registerProcessWakeHooks() {
|
|
103
|
+
if (_wakeHooksRegistered) return;
|
|
104
|
+
try {
|
|
105
|
+
const a2a = require('./src/gep/a2aProtocol.js');
|
|
106
|
+
if (typeof a2a.registerWakeHook !== 'function') return;
|
|
107
|
+
a2a.registerWakeHook(function () {
|
|
108
|
+
try { _interruptAllSleeps(); } catch (_) {}
|
|
109
|
+
});
|
|
110
|
+
// R13: guards.sleepMs is a separate private helper used for 60-120s
|
|
111
|
+
// backoffs inside evolve.run() arms (active-sessions, system-load,
|
|
112
|
+
// pending-solidify). Without this hook, a guard sleep that spans
|
|
113
|
+
// macOS suspend would block the cycle for the full window on the
|
|
114
|
+
// resumed monotonic clock even though the outer sleep was interrupted.
|
|
115
|
+
a2a.registerWakeHook(function () {
|
|
116
|
+
try {
|
|
117
|
+
const guards = require('./src/evolve/guards');
|
|
118
|
+
if (guards && typeof guards._interruptGuardSleeps === 'function') {
|
|
119
|
+
guards._interruptGuardSleeps();
|
|
120
|
+
}
|
|
121
|
+
} catch (_) {}
|
|
122
|
+
});
|
|
123
|
+
a2a.registerWakeHook(function () {
|
|
124
|
+
try {
|
|
125
|
+
const v = require('./src/gep/validator');
|
|
126
|
+
if (v && typeof v.pokeValidatorDaemon === 'function') {
|
|
127
|
+
v.pokeValidatorDaemon();
|
|
128
|
+
}
|
|
129
|
+
} catch (_) {}
|
|
130
|
+
});
|
|
131
|
+
_wakeHooksRegistered = true;
|
|
132
|
+
} catch (_) {}
|
|
53
133
|
}
|
|
54
134
|
|
|
55
135
|
function readJsonSafe(p) {
|
|
@@ -186,35 +266,256 @@ function getLastSignals(statePath) {
|
|
|
186
266
|
}
|
|
187
267
|
}
|
|
188
268
|
|
|
189
|
-
// Singleton Guard - prevent multiple evolver daemon instances
|
|
269
|
+
// Singleton Guard - prevent multiple evolver daemon instances.
|
|
270
|
+
//
|
|
271
|
+
// Round-4: pidfile location previously defaulted to __dirname, which is a
|
|
272
|
+
// DIFFERENT path per install mode -- /usr/local/lib/node_modules/... for a
|
|
273
|
+
// global install, the dev-clone path for `node index.js`, a transient
|
|
274
|
+
// $NPM_CACHE/_npx/<hash> for `npx evolver`. Two daemons launched under
|
|
275
|
+
// different install modes never saw each other's lock and could run
|
|
276
|
+
// concurrently against the same ~/.evomap/node_secret, ping-ponging on
|
|
277
|
+
// secret rotation and silently entering reauth backoff -- the user-
|
|
278
|
+
// reported "first launch ok, idle, then dead forever" pattern. Default
|
|
279
|
+
// now lives under the per-user state dir so all install modes converge.
|
|
280
|
+
// EVOLVER_LOCK_DIR still overrides for tests / sandboxed runs.
|
|
190
281
|
function getLockFilePath() {
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
282
|
+
if (process.env.EVOLVER_LOCK_DIR) {
|
|
283
|
+
return path.join(process.env.EVOLVER_LOCK_DIR, 'evolver.pid');
|
|
284
|
+
}
|
|
285
|
+
// os.homedir() is cross-platform; process.env.HOME is unset on Windows.
|
|
286
|
+
return path.join(os.homedir(), '.evomap', 'instance.lock');
|
|
287
|
+
}
|
|
288
|
+
|
|
289
|
+
function _writeLockAtomic(lockFile, payload) {
|
|
290
|
+
// Round-6 (§19.8): the previous implementation used tmp + rename, which
|
|
291
|
+
// makes the WRITE atomic but not the OWNERSHIP claim. Two processes
|
|
292
|
+
// could both rename their own tmp file over the same lockFile (rename
|
|
293
|
+
// is atomic per call but successive renames overwrite each other), then
|
|
294
|
+
// each read it back and -- if the second rename happened between the
|
|
295
|
+
// first process's rename and its read-back -- see the OTHER process's
|
|
296
|
+
// PID. Each then concludes "I lost the race" and exits, leaving the
|
|
297
|
+
// lockFile owned by no live process. Symmetrically, two processes can
|
|
298
|
+
// each see their own PID if the reads happen between their respective
|
|
299
|
+
// renames, and both conclude they won.
|
|
300
|
+
//
|
|
301
|
+
// The proper primitive is link(2): given a unique tmp file, link to the
|
|
302
|
+
// target path fails atomically with EEXIST if the target already
|
|
303
|
+
// exists. Only one of N concurrent linkers succeeds.
|
|
304
|
+
// NOTE(windows): mode 0o700 / 0o600 are silently ignored on Windows.
|
|
305
|
+
// The lock directory and tmp file will NOT be owner-only on Windows.
|
|
306
|
+
// Isolation relies solely on the user-profile directory ACLs.
|
|
307
|
+
const dir = path.dirname(lockFile);
|
|
308
|
+
try { fs.mkdirSync(dir, { recursive: true, mode: 0o700 }); } catch (_) {}
|
|
309
|
+
const tmp = lockFile + '.' + process.pid + '.tmp';
|
|
310
|
+
fs.writeFileSync(tmp, payload, { encoding: 'utf8', mode: 0o600 });
|
|
311
|
+
// link() requires the target NOT to exist. The caller in the takeover
|
|
312
|
+
// path has already unlinked the stale lockFile via fs.unlinkSync
|
|
313
|
+
// (ignoring ENOENT). If a concurrent process beat us to the link, our
|
|
314
|
+
// linkSync below throws EEXIST -- we surface that to the caller and
|
|
315
|
+
// clean up our tmp.
|
|
316
|
+
//
|
|
317
|
+
// EXDEV: fs.link() fails with EXDEV when tmp and lockFile are on different
|
|
318
|
+
// volumes (can happen on Windows when EVOLVER_LOCK_DIR points to a drive
|
|
319
|
+
// other than the tmp dir). Fall back to renameSync, which Node.js handles
|
|
320
|
+
// cross-device by copying + deleting. rename is not atomic in this path,
|
|
321
|
+
// so the EEXIST guard is lost, but this is an unusual configuration and
|
|
322
|
+
// the result is still safe (worst case: two daemons both think they won,
|
|
323
|
+
// the second write wins, the first will exit on its next tick when it
|
|
324
|
+
// reads back a foreign PID via the heartbeat).
|
|
325
|
+
try {
|
|
326
|
+
fs.linkSync(tmp, lockFile);
|
|
327
|
+
} catch (err) {
|
|
328
|
+
if (err && err.code === 'EXDEV') {
|
|
329
|
+
// Cross-device: rename falls back to copy+delete inside Node.js; this
|
|
330
|
+
// loses the atomic-EEXIST guarantee but is better than hard-failing.
|
|
331
|
+
try {
|
|
332
|
+
fs.renameSync(tmp, lockFile);
|
|
333
|
+
} catch (renameErr) {
|
|
334
|
+
try { fs.unlinkSync(tmp); } catch (_) {}
|
|
335
|
+
throw renameErr;
|
|
336
|
+
}
|
|
337
|
+
return; // tmp has been consumed by renameSync, skip unlinkSync below
|
|
338
|
+
}
|
|
339
|
+
try { fs.unlinkSync(tmp); } catch (_) {}
|
|
340
|
+
throw err;
|
|
341
|
+
}
|
|
342
|
+
try { fs.unlinkSync(tmp); } catch (_) {}
|
|
195
343
|
}
|
|
344
|
+
|
|
345
|
+
function _readLockPayload(lockFile) {
|
|
346
|
+
try {
|
|
347
|
+
const raw = fs.readFileSync(lockFile, 'utf8').trim();
|
|
348
|
+
if (!raw) return null;
|
|
349
|
+
// Backward-compat: older lock files contained only the pid as text.
|
|
350
|
+
// Newer payloads are JSON {pid, uid, startedAt}.
|
|
351
|
+
if (raw[0] === '{') {
|
|
352
|
+
try { return JSON.parse(raw); } catch (_) { return null; }
|
|
353
|
+
}
|
|
354
|
+
const pid = parseInt(raw, 10);
|
|
355
|
+
return Number.isFinite(pid) && pid > 0 ? { pid: pid } : null;
|
|
356
|
+
} catch (_) { return null; }
|
|
357
|
+
}
|
|
358
|
+
|
|
359
|
+
function _lockPayload() {
|
|
360
|
+
return JSON.stringify({
|
|
361
|
+
pid: process.pid,
|
|
362
|
+
uid: typeof process.getuid === 'function' ? process.getuid() : null,
|
|
363
|
+
startedAt: new Date().toISOString(),
|
|
364
|
+
// Round-9: marks a daemon that refreshes this lock file's mtime on a
|
|
365
|
+
// lease (see startLockRefresh). Only when this flag is present do
|
|
366
|
+
// acquireLock / refuseHelloIfDaemonRunning trust mtime-staleness to
|
|
367
|
+
// reclaim a lock whose PID is alive -- the PID-reuse / SIGKILL-stale
|
|
368
|
+
// guard. A lock written by an OLDER daemon (no flag) keeps the legacy
|
|
369
|
+
// kill(0)-only behavior so a new binary can never falsely steal a
|
|
370
|
+
// still-running old daemon's lock (which would run two daemons).
|
|
371
|
+
lease: true,
|
|
372
|
+
});
|
|
373
|
+
}
|
|
374
|
+
|
|
375
|
+
// Round-9: lease tunables for the daemon lock. A live daemon refreshes the
|
|
376
|
+
// lock mtime every LOCK_REFRESH_MS; a lock whose mtime is older than
|
|
377
|
+
// STALE_LOCK_TTL_MS (and that was written by a lease-aware daemon) is
|
|
378
|
+
// treated as stale even if its PID happens to be alive -- closing the
|
|
379
|
+
// "crash + PID reuse -> new daemon silently refuses to start" hole and the
|
|
380
|
+
// "SIGKILL leaves a stale lock nobody reclaims" hole. The TTL is well above
|
|
381
|
+
// the heartbeat interval (default 6min) so a healthy daemon never trips it.
|
|
382
|
+
// On Windows, SIGTERM is implemented as TerminateProcess() (not a catchable
|
|
383
|
+
// signal), so the shutdown() handler that calls releaseLock() never runs.
|
|
384
|
+
// The lock file stays on disk with the dead PID. Reduce the TTL on Windows
|
|
385
|
+
// so a subsequent start doesn't wait 15 minutes to reclaim the stale lock.
|
|
386
|
+
// Unix dropped from 15 min -> 5 min so a wedged daemon does not block takeover
|
|
387
|
+
// for a quarter hour. 5 min is still 2.5x the 2-min Unix refresh cadence.
|
|
388
|
+
// Windows 3 min TTL gets a 1-min refresh (3x margin) since 2-min refresh left
|
|
389
|
+
// only 1.5x margin against transient FS hiccups.
|
|
390
|
+
const STALE_LOCK_TTL_MS = process.platform === 'win32' ? 3 * 60_000 : 5 * 60_000;
|
|
391
|
+
const LOCK_REFRESH_MS = process.platform === 'win32' ? 1 * 60_000 : 2 * 60_000;
|
|
392
|
+
let _lockRefreshTimer = null;
|
|
393
|
+
|
|
394
|
+
// Returns true if the lock was written by a lease-aware daemon AND its
|
|
395
|
+
// mtime is older than the stale TTL -- i.e. no live owner is refreshing it,
|
|
396
|
+
// so it is safe to reclaim regardless of whether the recorded PID resolves.
|
|
397
|
+
function _lockIsStaleByLease(lockFile, payload) {
|
|
398
|
+
if (!payload || payload.lease !== true) return false;
|
|
399
|
+
try {
|
|
400
|
+
const ageMs = Date.now() - fs.statSync(lockFile).mtimeMs;
|
|
401
|
+
return ageMs > STALE_LOCK_TTL_MS;
|
|
402
|
+
} catch (_) {
|
|
403
|
+
return false;
|
|
404
|
+
}
|
|
405
|
+
}
|
|
406
|
+
|
|
407
|
+
// Start refreshing the lock file's mtime so other processes can tell this
|
|
408
|
+
// daemon is alive without trusting a (recyclable) PID. unref'd: it never
|
|
409
|
+
// keeps the event loop open on its own, but fires for as long as the daemon
|
|
410
|
+
// is otherwise alive.
|
|
411
|
+
function startLockRefresh() {
|
|
412
|
+
if (_lockRefreshTimer) return;
|
|
413
|
+
const lockFile = getLockFilePath();
|
|
414
|
+
_lockRefreshTimer = setInterval(function () {
|
|
415
|
+
try {
|
|
416
|
+
const now = new Date();
|
|
417
|
+
fs.utimesSync(lockFile, now, now);
|
|
418
|
+
} catch (_) { /* lock gone / FS error: nothing we can do here */ }
|
|
419
|
+
}, LOCK_REFRESH_MS);
|
|
420
|
+
if (_lockRefreshTimer && typeof _lockRefreshTimer.unref === 'function') {
|
|
421
|
+
_lockRefreshTimer.unref();
|
|
422
|
+
}
|
|
423
|
+
}
|
|
424
|
+
|
|
425
|
+
function stopLockRefresh() {
|
|
426
|
+
if (_lockRefreshTimer) {
|
|
427
|
+
clearInterval(_lockRefreshTimer);
|
|
428
|
+
_lockRefreshTimer = null;
|
|
429
|
+
}
|
|
430
|
+
}
|
|
431
|
+
|
|
196
432
|
function acquireLock() {
|
|
197
433
|
const lockFile = getLockFilePath();
|
|
434
|
+
// NOTE(windows): mode 0o700 / 0o600 are silently ignored on Windows.
|
|
435
|
+
// Lock directory and file permissions provide no OS-level isolation on
|
|
436
|
+
// Windows; rely on user-profile directory ACLs (%USERPROFILE%\.evomap).
|
|
198
437
|
try {
|
|
438
|
+
try { fs.mkdirSync(path.dirname(lockFile), { recursive: true, mode: 0o700 }); } catch (_) {}
|
|
199
439
|
try {
|
|
200
|
-
fs.writeFileSync(lockFile,
|
|
440
|
+
fs.writeFileSync(lockFile, _lockPayload(), { flag: 'wx', mode: 0o600 });
|
|
201
441
|
return true;
|
|
202
442
|
} catch (exclErr) {
|
|
203
443
|
if (exclErr.code !== 'EEXIST') throw exclErr;
|
|
204
444
|
}
|
|
205
|
-
const
|
|
206
|
-
if (!Number.isFinite(pid) || pid <= 0) {
|
|
207
|
-
console.log('[Singleton] Corrupt lock file
|
|
445
|
+
const payload = _readLockPayload(lockFile);
|
|
446
|
+
if (!payload || !Number.isFinite(payload.pid) || payload.pid <= 0) {
|
|
447
|
+
console.log('[Singleton] Corrupt lock file. Taking over.');
|
|
448
|
+
} else if (_lockIsStaleByLease(lockFile, payload)) {
|
|
449
|
+
// Round-9: a lease-aware daemon has not refreshed this lock's mtime
|
|
450
|
+
// within the stale TTL. Either it was SIGKILLed/crashed, or its PID
|
|
451
|
+
// has since been reused by an unrelated process (kill(0) below would
|
|
452
|
+
// then falsely report it alive and we would refuse to start forever).
|
|
453
|
+
// The expired lease is authoritative: take over.
|
|
454
|
+
console.log('[Singleton] Lock lease expired (PID ' + payload.pid + ', no mtime refresh for > ' +
|
|
455
|
+
Math.round(STALE_LOCK_TTL_MS / 60_000) + 'min). Taking over.');
|
|
208
456
|
} else {
|
|
209
457
|
try {
|
|
210
|
-
process.kill(pid, 0);
|
|
211
|
-
|
|
458
|
+
process.kill(payload.pid, 0);
|
|
459
|
+
// Process exists. Distinguish "alive, our user" (refuse) from
|
|
460
|
+
// "alive, different uid" (also refuse -- never barge into a root
|
|
461
|
+
// daemon under a user-launched evolver, etc.).
|
|
462
|
+
console.log(`[Singleton] Evolver loop already running (PID ${payload.pid}). Exiting.`);
|
|
212
463
|
return false;
|
|
213
464
|
} catch (e) {
|
|
214
|
-
|
|
465
|
+
if (e && e.code === 'EPERM') {
|
|
466
|
+
// PID exists but belongs to another user. Conservatively
|
|
467
|
+
// refuse: barging in would race the existing daemon for
|
|
468
|
+
// secret/heartbeat ownership.
|
|
469
|
+
console.warn(`[Singleton] Lock owned by PID ${payload.pid} (different user). Refusing to take over. ` +
|
|
470
|
+
`Remove ${lockFile} manually if the PID is actually dead.`);
|
|
471
|
+
return false;
|
|
472
|
+
}
|
|
473
|
+
console.log(`[Singleton] Stale lock found (PID ${payload.pid}). Taking over.`);
|
|
215
474
|
}
|
|
216
475
|
}
|
|
217
|
-
|
|
476
|
+
// Atomic takeover so two daemons that both observe the same stale PID
|
|
477
|
+
// and pass the kill(0) check cannot both end up "owning" the lock.
|
|
478
|
+
//
|
|
479
|
+
// Bug it fixes: the previous "unconditional unlinkSync then linkSync"
|
|
480
|
+
// pattern was NOT atomic across acquirers. Interleaving where P1 wins
|
|
481
|
+
// the linkSync but P2's unlinkSync then deletes P1's freshly-linked
|
|
482
|
+
// file (P2 never re-verifies it's deleting the same stale lock it
|
|
483
|
+
// observed) lets P2's subsequent linkSync also succeed. Both processes
|
|
484
|
+
// then return true and start a daemon, racing each other on the
|
|
485
|
+
// shared singleton secret store.
|
|
486
|
+
//
|
|
487
|
+
// renameSync is atomic at the filesystem level: only one of N racing
|
|
488
|
+
// acquirers can move the stale lockFile to a unique claim name, the
|
|
489
|
+
// rest see ENOENT and abort. After the claim succeeds, _writeLockAtomic
|
|
490
|
+
// installs the fresh lock; the claim file is unlinked in every exit
|
|
491
|
+
// path so it doesn't accumulate.
|
|
492
|
+
const claimFile = lockFile + '.' + process.pid + '.' + Date.now() + '.takeover';
|
|
493
|
+
try {
|
|
494
|
+
fs.renameSync(lockFile, claimFile);
|
|
495
|
+
} catch (e) {
|
|
496
|
+
if (e && e.code === 'ENOENT') {
|
|
497
|
+
// Another concurrent acquirer already claimed the stale lock.
|
|
498
|
+
// They'll race us on _writeLockAtomic below; the EEXIST branch
|
|
499
|
+
// handles the loser case correctly.
|
|
500
|
+
} else {
|
|
501
|
+
console.warn('[Singleton] Cannot claim stale lock at ' + lockFile + ': ' + e.message);
|
|
502
|
+
return false;
|
|
503
|
+
}
|
|
504
|
+
}
|
|
505
|
+
try {
|
|
506
|
+
_writeLockAtomic(lockFile, _lockPayload());
|
|
507
|
+
} catch (linkErr) {
|
|
508
|
+
try { fs.unlinkSync(claimFile); } catch (_) {}
|
|
509
|
+
if (linkErr && linkErr.code === 'EEXIST') {
|
|
510
|
+
// Lost the link race to another concurrent acquirer. Read who
|
|
511
|
+
// won (best-effort) for the log line.
|
|
512
|
+
const winner = _readLockPayload(lockFile);
|
|
513
|
+
console.log('[Singleton] Lost takeover race to PID ' + (winner && winner.pid) + '. Exiting.');
|
|
514
|
+
return false;
|
|
515
|
+
}
|
|
516
|
+
throw linkErr;
|
|
517
|
+
}
|
|
518
|
+
try { fs.unlinkSync(claimFile); } catch (_) {}
|
|
218
519
|
return true;
|
|
219
520
|
} catch (err) {
|
|
220
521
|
console.error('[Singleton] Lock acquisition failed:', err);
|
|
@@ -226,12 +527,73 @@ function releaseLock() {
|
|
|
226
527
|
const lockFile = getLockFilePath();
|
|
227
528
|
try {
|
|
228
529
|
if (fs.existsSync(lockFile)) {
|
|
229
|
-
|
|
230
|
-
|
|
530
|
+
const payload = _readLockPayload(lockFile);
|
|
531
|
+
if (payload && payload.pid === process.pid) fs.unlinkSync(lockFile);
|
|
231
532
|
}
|
|
232
533
|
} catch (e) { /* ignore */ }
|
|
233
534
|
}
|
|
234
535
|
|
|
536
|
+
// Round-7 (§20.7): the daemon-lock acquireLock() only fires for `--loop`
|
|
537
|
+
// mode; CLI subcommands like `evolver fetch` and `evolver sync` run
|
|
538
|
+
// without acquiring the lock and freely call sendHelloToHub when
|
|
539
|
+
// node_secret is missing. The hub-side hello-with-rotate rewrites the
|
|
540
|
+
// node_secret on disk, so two writers (the daemon's heartbeat path
|
|
541
|
+
// rotating one secret + this CLI's sendHelloToHub writing a different
|
|
542
|
+
// one) race to be "last writer." Whichever wrote second silences the
|
|
543
|
+
// other -- the daemon then 401-loops -> enters reauth backoff -> goes
|
|
544
|
+
// silent for 30 min..4 h. The original §6 "instance lock" scenario.
|
|
545
|
+
//
|
|
546
|
+
// This helper does NOT take the lock (the daemon legitimately owns it);
|
|
547
|
+
// it only refuses to proceed if a LIVE daemon owns the lock AND we are
|
|
548
|
+
// about to send a fresh hello. If the daemon is alive it already has a
|
|
549
|
+
// valid secret in ~/.evomap/node_secret, so the right thing for the CLI
|
|
550
|
+
// is to wait briefly for the secret to appear (newly registered daemon)
|
|
551
|
+
// or exit with an actionable error.
|
|
552
|
+
//
|
|
553
|
+
// Callers: every CLI subcommand whose runner could call sendHelloToHub()
|
|
554
|
+
// when getHubNodeSecret() returns empty. Currently: fetch, sync
|
|
555
|
+
// (round-7 §20.7), plus atp-complete, buy, orders, verify (round-8
|
|
556
|
+
// §21.8 -- the ATP runners hit the same vector via consumerAgent /
|
|
557
|
+
// merchantAgent / atpExecute paths).
|
|
558
|
+
function refuseHelloIfDaemonRunning(toolLabel) {
|
|
559
|
+
try {
|
|
560
|
+
const lockFile = getLockFilePath();
|
|
561
|
+
if (!fs.existsSync(lockFile)) return; // no daemon
|
|
562
|
+
const payload = _readLockPayload(lockFile);
|
|
563
|
+
if (!payload || !Number.isFinite(payload.pid) || payload.pid <= 0) return;
|
|
564
|
+
if (payload.pid === process.pid) return; // shouldn't happen for CLI
|
|
565
|
+
// Round-9: a lease-aware lock whose mtime has gone stale means the
|
|
566
|
+
// daemon is dead (or its PID was reused). Do NOT refuse on it -- that
|
|
567
|
+
// was the "CLI hard-exits because it trusts a recyclable PID" hole.
|
|
568
|
+
if (_lockIsStaleByLease(lockFile, payload)) return;
|
|
569
|
+
try {
|
|
570
|
+
process.kill(payload.pid, 0);
|
|
571
|
+
} catch (e) {
|
|
572
|
+
if (e && e.code === 'ESRCH') return; // stale lock, daemon is gone
|
|
573
|
+
// EPERM = alive under a different user; still a real daemon. Fall
|
|
574
|
+
// through to refuse.
|
|
575
|
+
}
|
|
576
|
+
console.error(
|
|
577
|
+
'[' + toolLabel + '] Refusing to send hello: an evolver daemon ' +
|
|
578
|
+
'(PID ' + payload.pid + ') is running and owns ~/.evomap/instance.lock.'
|
|
579
|
+
);
|
|
580
|
+
console.error(
|
|
581
|
+
' Two concurrent hello calls would rotate node_secret against ' +
|
|
582
|
+
'each other and silence the daemon for hours.'
|
|
583
|
+
);
|
|
584
|
+
console.error(
|
|
585
|
+
' Either wait for the daemon to register (the secret will ' +
|
|
586
|
+
'appear at ~/.evomap/node_secret), or stop the daemon and retry.'
|
|
587
|
+
);
|
|
588
|
+
process.exit(1);
|
|
589
|
+
} catch (_) {
|
|
590
|
+
// Never let the lock-check helper itself escape; if the helper
|
|
591
|
+
// throws (FS permission, etc.) we fall through to the original code
|
|
592
|
+
// path. The race we're protecting against is rare; failing closed
|
|
593
|
+
// here would block legitimate CLI use.
|
|
594
|
+
}
|
|
595
|
+
}
|
|
596
|
+
|
|
235
597
|
async function main() {
|
|
236
598
|
const args = process.argv.slice(2);
|
|
237
599
|
const command = args[0];
|
|
@@ -242,13 +604,49 @@ async function main() {
|
|
|
242
604
|
|
|
243
605
|
if (!command || command === 'run' || command === '/evolve' || isLoop) {
|
|
244
606
|
if (isLoop) {
|
|
607
|
+
// EPIPE protection. The daemon may outlive the controlling
|
|
608
|
+
// terminal (user closes the iTerm tab, ssh session drops, parent
|
|
609
|
+
// shell exits). The SIGHUP handler below covers the signal side,
|
|
610
|
+
// but the underlying pty fd is gone and the FIRST subsequent
|
|
611
|
+
// console.log writes to a closed pipe -> stdout emits 'error'
|
|
612
|
+
// with EPIPE. Without a listener attached, Node escalates EPIPE
|
|
613
|
+
// to uncaughtException, which our handler then turns into
|
|
614
|
+
// process.exit(1). Net result: daemon silently dies the next
|
|
615
|
+
// time it tries to log, with no useful trace. Swallow EPIPE
|
|
616
|
+
// explicitly so the daemon stays alive when its terminal goes
|
|
617
|
+
// away (matching standard daemonization practice).
|
|
618
|
+
try {
|
|
619
|
+
// EPIPE: swallow (daemon must outlive its controlling terminal).
|
|
620
|
+
// Non-EPIPE (EIO, ENOSPC on redirected log, etc.): the listener
|
|
621
|
+
// already prevents 'error' from escalating to uncaughtException,
|
|
622
|
+
// so write a one-line trace to the *other* stream so operators
|
|
623
|
+
// can see the failure mode instead of finding a silent daemon.
|
|
624
|
+
process.stdout.on('error', function (err) {
|
|
625
|
+
if (err && err.code === 'EPIPE') return;
|
|
626
|
+
try { process.stderr.write('[evolver] stdout error: ' + (err && (err.code || err.message) || err) + '\n'); } catch (_) {}
|
|
627
|
+
});
|
|
628
|
+
process.stderr.on('error', function (err) {
|
|
629
|
+
if (err && err.code === 'EPIPE') return;
|
|
630
|
+
try { process.stdout.write('[evolver] stderr error: ' + (err && (err.code || err.message) || err) + '\n'); } catch (_) {}
|
|
631
|
+
});
|
|
632
|
+
} catch (_) {}
|
|
633
|
+
|
|
245
634
|
const originalLog = console.log;
|
|
246
635
|
const originalWarn = console.warn;
|
|
247
636
|
const originalError = console.error;
|
|
248
637
|
function ts() { return '[' + new Date().toISOString() + ']'; }
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
638
|
+
// Wrap originals in try/catch so a broken transport (closed pty,
|
|
639
|
+
// disk full on a redirected log file) cannot escape and trip
|
|
640
|
+
// unhandledException -> exit(1) the next time we log.
|
|
641
|
+
console.log = (...args) => {
|
|
642
|
+
try { originalLog.call(console, ts(), ...args); } catch (_) {}
|
|
643
|
+
};
|
|
644
|
+
console.warn = (...args) => {
|
|
645
|
+
try { originalWarn.call(console, ts(), ...args); } catch (_) {}
|
|
646
|
+
};
|
|
647
|
+
console.error = (...args) => {
|
|
648
|
+
try { originalError.call(console, ts(), ...args); } catch (_) {}
|
|
649
|
+
};
|
|
252
650
|
}
|
|
253
651
|
|
|
254
652
|
console.log('Starting evolver...');
|
|
@@ -274,26 +672,371 @@ async function main() {
|
|
|
274
672
|
if (isLoop) {
|
|
275
673
|
// Internal daemon loop (no wrapper required).
|
|
276
674
|
if (!acquireLock()) process.exit(0);
|
|
675
|
+
// Round-9: refresh the lock lease so other processes can detect a
|
|
676
|
+
// crash / PID reuse via stale mtime instead of trusting kill(0).
|
|
677
|
+
startLockRefresh();
|
|
678
|
+
|
|
679
|
+
// Linux OOM score adjustment: lower oom_score_adj so the kernel
|
|
680
|
+
// deprioritises evolver when choosing an OOM victim. This is a
|
|
681
|
+
// best-effort hint -- the kernel can still kill us under extreme
|
|
682
|
+
// memory pressure, but we will not be the first target.
|
|
683
|
+
//
|
|
684
|
+
// Value -500 (range -1000..1000; -1000 = never kill, 0 = default,
|
|
685
|
+
// +1000 = kill first). -500 gives meaningful protection without
|
|
686
|
+
// reserving the slot for truly critical system services.
|
|
687
|
+
//
|
|
688
|
+
// Requires the process to be either root or to have CAP_SYS_RESOURCE.
|
|
689
|
+
// On most Docker/k8s images running as non-root this write will fail
|
|
690
|
+
// with EACCES -- that is expected and harmless; we log a one-liner so
|
|
691
|
+
// operators know to pass --oom-score-adj=-500 via their container spec,
|
|
692
|
+
// or to set /proc/<pid>/oom_score_adj from the supervising process.
|
|
693
|
+
//
|
|
694
|
+
// Users who want to set this from outside the process (safer, no CAP):
|
|
695
|
+
// echo -500 > /proc/$(pgrep -f "node.*evolver.*--loop")/oom_score_adj
|
|
696
|
+
//
|
|
697
|
+
// Opt-out: EVOLVER_DISABLE_OOM_ADJUST=1
|
|
698
|
+
if (process.platform === 'linux' &&
|
|
699
|
+
String(process.env.EVOLVER_DISABLE_OOM_ADJUST || '') !== '1') {
|
|
700
|
+
try {
|
|
701
|
+
const _oomPath = '/proc/self/oom_score_adj';
|
|
702
|
+
const _oomTarget = '-500';
|
|
703
|
+
require('fs').writeFileSync(_oomPath, _oomTarget + '\n', 'utf8');
|
|
704
|
+
console.log('[evolver] Set Linux oom_score_adj=' + _oomTarget +
|
|
705
|
+
' to reduce OOM-kill priority.');
|
|
706
|
+
} catch (oomErr) {
|
|
707
|
+
// EACCES under non-root / no CAP_SYS_RESOURCE is expected; EPERM
|
|
708
|
+
// inside stricter seccomp/apparmor profiles. Both are non-fatal.
|
|
709
|
+
const oomCode = oomErr && oomErr.code ? oomErr.code : 'unknown';
|
|
710
|
+
console.log('[evolver] Could not set oom_score_adj (' + oomCode +
|
|
711
|
+
'). To protect evolver from OOM kill, run as root, add ' +
|
|
712
|
+
'CAP_SYS_RESOURCE, or set oom_score_adj externally via your ' +
|
|
713
|
+
'container spec (e.g. resources.requests + oom_score_adj in k8s).');
|
|
714
|
+
}
|
|
715
|
+
}
|
|
716
|
+
|
|
717
|
+
// Round-4: macOS App Nap / QoS demotion mitigation. Without this,
|
|
718
|
+
// a backgrounded `evolver --loop` running in an iTerm tab gets its
|
|
719
|
+
// process QoS demoted to UTILITY/BACKGROUND once the parent app
|
|
720
|
+
// is no longer focused. CPU runtime caps to ~5% of one core,
|
|
721
|
+
// setTimeout resolution drops toward 1 Hz, disk I/O is throttled.
|
|
722
|
+
// The drift detector cannot rescue this because the demotion does
|
|
723
|
+
// NOT cause Date.now() to jump -- only the inter-tick interval
|
|
724
|
+
// dilates, which the detector samples through its own (also
|
|
725
|
+
// demoted) setInterval. Net result: heartbeat appears alive but
|
|
726
|
+
// ticks fire so slowly that the hub marks the node offline,
|
|
727
|
+
// matching the user-reported "first launch ok -> idle -> dead
|
|
728
|
+
// forever" pattern.
|
|
729
|
+
//
|
|
730
|
+
// os.setPriority() raises BSD process priority; macOS bridges that
|
|
731
|
+
// to Mach thread QoS via the priority bridge so the demotion does
|
|
732
|
+
// not engage. -10 is the most negative value raisable without
|
|
733
|
+
// root. Failures are logged but non-fatal (e.g. EPERM under a
|
|
734
|
+
// restrictive sandbox -- the daemon continues, just unprotected).
|
|
735
|
+
// Opt-out via EVOLVER_DISABLE_PRIORITY_BOOST=1 for users on
|
|
736
|
+
// power-constrained battery profiles who would rather accept
|
|
737
|
+
// the throttle than the extra wake-time.
|
|
738
|
+
if (process.platform === 'darwin' &&
|
|
739
|
+
String(process.env.EVOLVER_DISABLE_PRIORITY_BOOST || '') !== '1') {
|
|
740
|
+
let priorityBoostOk = false;
|
|
741
|
+
try {
|
|
742
|
+
const os = require('os');
|
|
743
|
+
os.setPriority(0, -10);
|
|
744
|
+
// Round-5: actually verify the boost landed. macOS silently
|
|
745
|
+
// returns success from setPriority(2) under some sandboxes
|
|
746
|
+
// even when the underlying syscall was rejected by the
|
|
747
|
+
// Mach thread-policy bridge. Read it back; if the value is
|
|
748
|
+
// still 0 (or worse), App Nap will engage and the user
|
|
749
|
+
// sees the "first launch -> idle -> dead" symptom from
|
|
750
|
+
// round-3 with NO log evidence to RCA from.
|
|
751
|
+
const observed = os.getPriority();
|
|
752
|
+
if (observed <= -10) {
|
|
753
|
+
priorityBoostOk = true;
|
|
754
|
+
console.log('[evolver] Raised process priority on macOS to ' + observed +
|
|
755
|
+
' to prevent App Nap / QoS demotion.');
|
|
756
|
+
} else {
|
|
757
|
+
console.warn('[evolver] setPriority(-10) reported success but observed priority is ' +
|
|
758
|
+
observed + '; App Nap protection NOT in effect. ' +
|
|
759
|
+
'Run with EVOLVER_CAFFEINATE=1 or via `caffeinate -is node index.js --loop`.');
|
|
760
|
+
}
|
|
761
|
+
} catch (e) {
|
|
762
|
+
console.warn('[evolver] setPriority(-10) refused (' + (e && e.code || 'unknown') +
|
|
763
|
+
'): ' + (e && e.message || e) + '. App Nap protection NOT in effect. ' +
|
|
764
|
+
'Run with EVOLVER_CAFFEINATE=1 or via `caffeinate -is node index.js --loop`.');
|
|
765
|
+
}
|
|
766
|
+
// Round-5: caffeinate side-child. Round-4 made this opt-in via
|
|
767
|
+
// EVOLVER_CAFFEINATE=1 to avoid the extra Activity-Monitor row;
|
|
768
|
+
// the round-5 audit found that 99% of users never set the env
|
|
769
|
+
// var, so the App Nap fallback was effectively unused. Promote
|
|
770
|
+
// to default-on when the priority boost did NOT land (so we
|
|
771
|
+
// either have priority or have caffeinate, never neither),
|
|
772
|
+
// unless the user has explicitly opted out via
|
|
773
|
+
// EVOLVER_CAFFEINATE=0. The combined effect: a fresh laptop
|
|
774
|
+
// user gets at least one layer of throttle protection without
|
|
775
|
+
// having to learn about either env var.
|
|
776
|
+
const caffeinateRaw = String(process.env.EVOLVER_CAFFEINATE || '').toLowerCase().trim();
|
|
777
|
+
const caffeinateOptedIn = caffeinateRaw === '1' || caffeinateRaw === 'true';
|
|
778
|
+
const caffeinateOptedOut = caffeinateRaw === '0' || caffeinateRaw === 'false';
|
|
779
|
+
const caffeinateFallback = !priorityBoostOk && !caffeinateOptedOut;
|
|
780
|
+
if (caffeinateOptedIn || caffeinateFallback) {
|
|
781
|
+
try {
|
|
782
|
+
const child = spawn('caffeinate', ['-i', '-w', String(process.pid)], {
|
|
783
|
+
detached: true,
|
|
784
|
+
stdio: 'ignore',
|
|
785
|
+
});
|
|
786
|
+
child.unref();
|
|
787
|
+
console.log('[evolver] Spawned caffeinate -i -w ' + process.pid +
|
|
788
|
+
' to block App Nap (pid ' + child.pid + ').' +
|
|
789
|
+
(caffeinateFallback ? ' (fallback because priority boost was refused)' : ''));
|
|
790
|
+
} catch (e) {
|
|
791
|
+
console.warn('[evolver] caffeinate spawn failed: ' +
|
|
792
|
+
(e && e.message || e) + '. App Nap may throttle the heartbeat. ' +
|
|
793
|
+
'Install caffeinate (Xcode CLT) or run under a launchd plist with NSAppSleepDisabled=1.');
|
|
794
|
+
}
|
|
795
|
+
}
|
|
796
|
+
}
|
|
797
|
+
|
|
798
|
+
// Event-loop keep-alive anchor (defense-in-depth for the sleepMs fix).
|
|
799
|
+
//
|
|
800
|
+
// All timers in a2aProtocol.js (heartbeat, drift detector, self-driving
|
|
801
|
+
// poll, SSE reconnect) are unref'd so they never prevent a clean exit.
|
|
802
|
+
// The sleepMs() timer above is now ref'd (the primary fix), but as an
|
|
803
|
+
// additional safety net we install one ref'd setInterval here that fires
|
|
804
|
+
// every 10 minutes. Its only job is to emit a lightweight log line so
|
|
805
|
+
// the evolver_loop.log gets touched even when the daemon is completely
|
|
806
|
+
// idle (no session signals, evolve cycle sleeping at maxSleepMs). This
|
|
807
|
+
// guarantees the event loop has at least one ref'd handle at all times
|
|
808
|
+
// while the daemon is running, and provides a heartbeat-on-disk so
|
|
809
|
+
// lifecycle.checkHealth() (MAX_SILENCE_MS = 30 min default) does not
|
|
810
|
+
// wrongly declare the process stagnant during legitimate long idle windows.
|
|
811
|
+
// Cleared in shutdown() so it does not outlive the daemon.
|
|
812
|
+
const _KEEPALIVE_INTERVAL_MS = 10 * 60 * 1000;
|
|
813
|
+
let _keepAliveTimer = setInterval(function () {
|
|
814
|
+
try {
|
|
815
|
+
// Inline append that mirrors a2aProtocol._appendHeartbeatLog's
|
|
816
|
+
// ENOENT-retry (that helper is not exported).
|
|
817
|
+
const a2aKA = require('./src/gep/a2aProtocol');
|
|
818
|
+
if (typeof a2aKA.getHeartbeatStats === 'function') {
|
|
819
|
+
const s = a2aKA.getHeartbeatStats();
|
|
820
|
+
const { getEvolverLogPath } = require('./src/gep/paths');
|
|
821
|
+
const fsKA = require('fs');
|
|
822
|
+
const pathKA = require('path');
|
|
823
|
+
try {
|
|
824
|
+
const logPath = getEvolverLogPath();
|
|
825
|
+
fsKA.mkdirSync(pathKA.dirname(logPath), { recursive: true });
|
|
826
|
+
const line = JSON.stringify({
|
|
827
|
+
ts: new Date().toISOString(),
|
|
828
|
+
type: 'keepalive_tick',
|
|
829
|
+
hb_running: s.running,
|
|
830
|
+
hb_last_tick_ago_s: s.lastTickAt ? Math.round((Date.now() - s.lastTickAt) / 1000) : null,
|
|
831
|
+
}) + '\n';
|
|
832
|
+
try {
|
|
833
|
+
fsKA.appendFileSync(logPath, line, { encoding: 'utf8' });
|
|
834
|
+
} catch (e) {
|
|
835
|
+
if (e && e.code === 'ENOENT') {
|
|
836
|
+
try {
|
|
837
|
+
fsKA.mkdirSync(pathKA.dirname(logPath), { recursive: true });
|
|
838
|
+
fsKA.appendFileSync(logPath, line, { encoding: 'utf8' });
|
|
839
|
+
} catch (_) { /* log destination broken; do not throw out */ }
|
|
840
|
+
}
|
|
841
|
+
}
|
|
842
|
+
} catch (_) { /* never let the log write kill the timer */ }
|
|
843
|
+
}
|
|
844
|
+
} catch (_) { /* never let any error kill the keep-alive timer */ }
|
|
845
|
+
}, _KEEPALIVE_INTERVAL_MS);
|
|
846
|
+
// Intentionally ref'd: this is the explicit event-loop anchor.
|
|
847
|
+
// Do NOT add .unref() here -- that would defeat the purpose.
|
|
848
|
+
|
|
277
849
|
function shutdown() {
|
|
850
|
+
if (_keepAliveTimer) { clearInterval(_keepAliveTimer); _keepAliveTimer = null; }
|
|
851
|
+
stopLockRefresh();
|
|
278
852
|
releaseLock();
|
|
853
|
+
// stopHeartbeat() clears the drift detector interval and the heartbeat
|
|
854
|
+
// timer, preventing "ghost tick" log noise after exit and ensuring a
|
|
855
|
+
// clean state if the process is somehow continued (test harness, etc.).
|
|
856
|
+
try { require('./src/gep/a2aProtocol').stopHeartbeat(); } catch (e) {}
|
|
279
857
|
try { require('./src/gep/a2aProtocol').stopEventStream(); } catch (e) {}
|
|
280
858
|
}
|
|
281
859
|
process.on('exit', shutdown);
|
|
282
860
|
process.on('SIGINT', () => { shutdown(); process.exit(); });
|
|
283
861
|
process.on('SIGTERM', () => { shutdown(); process.exit(); });
|
|
862
|
+
// SIGHUP: two meanings depending on platform and how the daemon was started.
|
|
863
|
+
//
|
|
864
|
+
// macOS / interactive terminal: closing the iTerm/Terminal tab sends
|
|
865
|
+
// SIGHUP to the controlling process, and Node's default action is to
|
|
866
|
+
// terminate. That is the most common "first-launch, then idle, then
|
|
867
|
+
// evolver dead" path on macOS. As a daemon we intentionally ignore it.
|
|
868
|
+
//
|
|
869
|
+
// Linux systemd: `systemctl reload evolver` delivers SIGHUP to signal
|
|
870
|
+
// configuration reload. The socket / connection state may be stale (e.g.
|
|
871
|
+
// the hub URL changed in .env, or the admin wants a fresh hello after a
|
|
872
|
+
// manual secret rotation). We treat reload as a soft wake-recovery: drain
|
|
873
|
+
// the undici pool, poke the heartbeat, and restart the SSE stream, which
|
|
874
|
+
// is identical to what SIGCONT / the drift detector do on system resume.
|
|
875
|
+
// We also emit sd_notify RELOADING=1 / READY=1 so systemd can track the
|
|
876
|
+
// reload state (required for Type=notify units that call systemctl reload).
|
|
877
|
+
//
|
|
878
|
+
// A one-shot (non --loop) invocation keeps the default behavior because
|
|
879
|
+
// this branch is gated on `isLoop`.
|
|
880
|
+
process.on('SIGHUP', () => {
|
|
881
|
+
try {
|
|
882
|
+
if (process.platform === 'linux') {
|
|
883
|
+
// On Linux, SIGHUP from systemd means reload, not terminal close.
|
|
884
|
+
// Announce reload state to the service manager first so systemd
|
|
885
|
+
// does not time out waiting, then perform the recovery, then signal
|
|
886
|
+
// READY=1 again to confirm we are back in steady state.
|
|
887
|
+
try {
|
|
888
|
+
const a2aForSd = require('./src/gep/a2aProtocol.js');
|
|
889
|
+
if (typeof a2aForSd._sdNotify === 'function') {
|
|
890
|
+
// MONOTONIC_USEC requires microseconds from the monotonic clock.
|
|
891
|
+
// process.hrtime() returns [sec, nsec] from a fixed epoch;
|
|
892
|
+
// avoids BigInt literals for Node <10.3 compatibility.
|
|
893
|
+
const _hrt = process.hrtime();
|
|
894
|
+
const _monUsec = _hrt[0] * 1000000 + Math.floor(_hrt[1] / 1000);
|
|
895
|
+
a2aForSd._sdNotify('RELOADING=1\nMONOTONIC_USEC=' + _monUsec);
|
|
896
|
+
}
|
|
897
|
+
} catch (_) {}
|
|
898
|
+
console.warn('[evolver] Received SIGHUP on Linux (systemctl reload?). ' +
|
|
899
|
+
'Running wake recovery (drain pool + poke heartbeat + restart SSE). ' +
|
|
900
|
+
'To stop the daemon use SIGINT/SIGTERM.');
|
|
901
|
+
try {
|
|
902
|
+
const a2a = require('./src/gep/a2aProtocol.js');
|
|
903
|
+
if (typeof a2a._runWakeRecovery === 'function') a2a._runWakeRecovery();
|
|
904
|
+
} catch (_) {}
|
|
905
|
+
// Interrupt any pending sleepMs so the evolve loop picks up
|
|
906
|
+
// immediately after the reload rather than sitting out its window.
|
|
907
|
+
try { _interruptAllSleeps(); } catch (_) {}
|
|
908
|
+
// Signal READY=1 to close the RELOADING window. systemd will mark
|
|
909
|
+
// the reload complete once it sees this notification.
|
|
910
|
+
try {
|
|
911
|
+
const a2aForSd2 = require('./src/gep/a2aProtocol.js');
|
|
912
|
+
if (typeof a2aForSd2._sdNotify === 'function') {
|
|
913
|
+
a2aForSd2._sdNotify('READY=1');
|
|
914
|
+
}
|
|
915
|
+
} catch (_) {}
|
|
916
|
+
} else {
|
|
917
|
+
// macOS / non-systemd: terminal-close semantics, ignore the signal.
|
|
918
|
+
console.warn('[evolver] Received SIGHUP (controlling terminal closed?). ' +
|
|
919
|
+
'Daemon ignoring -- heartbeat loop continues. To stop the daemon use SIGINT/SIGTERM.');
|
|
920
|
+
}
|
|
921
|
+
} catch (_) {}
|
|
922
|
+
});
|
|
923
|
+
// SIGCONT fires on `kill -CONT`, debugger detach, and some VM/sleep
|
|
924
|
+
// resume paths. Nudge the heartbeat loop so it doesn't sit waiting for
|
|
925
|
+
// its next scheduled tick (which could be up to 30 min away under
|
|
926
|
+
// backoff) before reconnecting after a wake event. Also restart the
|
|
927
|
+
// SSE stream: the underlying TCP socket almost certainly died during
|
|
928
|
+
// the SIGSTOP window without a FIN reaching us, and the existing
|
|
929
|
+
// exponential reconnect could be up to 120s away on the resumed
|
|
930
|
+
// monotonic clock.
|
|
931
|
+
// Round-6 (§19.5): register process-level wake hooks so both the
|
|
932
|
+
// SIGCONT handler and the drift detector's long-sleep branch
|
|
933
|
+
// (a2aProtocol) interrupt the outer evolve sleepMs and poke the
|
|
934
|
+
// validator daemon, not just the heartbeat-internal recovery.
|
|
935
|
+
_registerProcessWakeHooks();
|
|
936
|
+
// SIGCONT is not supported on Windows (process.on() throws ERR_UNKNOWN_SIGNAL).
|
|
937
|
+
// Wake recovery on Windows is handled exclusively by the drift detector.
|
|
938
|
+
if (process.platform !== 'win32') {
|
|
939
|
+
process.on('SIGCONT', () => {
|
|
940
|
+
// Real recovery delegates to a2aProtocol._runWakeRecovery so
|
|
941
|
+
// SIGCONT and the drift detector share one code path. NOTE:
|
|
942
|
+
// per followups §18.2, SIGCONT is never sent by the macOS
|
|
943
|
+
// kernel on system wake; this handler primarily covers:
|
|
944
|
+
// - hypervisor/docker resume (container unpause)
|
|
945
|
+
// - `kill -CONT <pid>` from operators or supervisors
|
|
946
|
+
// - Linux debugger attach/detach (ptrace SIGSTOP+SIGCONT;
|
|
947
|
+
// on Linux this is a true job-control signal unlike macOS)
|
|
948
|
+
// - `docker unpause` (sends SIGCONT to all cgroup processes)
|
|
949
|
+
// Bare-metal macOS wake recovery is driven by the drift
|
|
950
|
+
// detector only. _runWakeRecovery() has a 1s debounce gate so
|
|
951
|
+
// a rapid burst (e.g. gdb repeatedly attaching) collapses into
|
|
952
|
+
// one recovery without leaking undici agents or SSE connections.
|
|
953
|
+
try {
|
|
954
|
+
const a2a = require('./src/gep/a2aProtocol.js');
|
|
955
|
+
if (typeof a2a._runWakeRecovery === 'function') a2a._runWakeRecovery();
|
|
956
|
+
} catch (_) {}
|
|
957
|
+
});
|
|
958
|
+
}
|
|
284
959
|
process.on('uncaughtException', (err) => {
|
|
285
960
|
console.error('[FATAL] Uncaught exception:', err && err.stack ? err.stack : String(err));
|
|
286
961
|
releaseLock();
|
|
287
962
|
process.exit(1);
|
|
288
963
|
});
|
|
289
964
|
// Sliding window: only exit if many rejections cluster in a short
|
|
290
|
-
// period
|
|
291
|
-
//
|
|
292
|
-
//
|
|
293
|
-
// process for noise. Cluster =
|
|
965
|
+
// period AND the daemon shows no other signs of life. A daemon
|
|
966
|
+
// running for weeks can accumulate harmless, unrelated rejections
|
|
967
|
+
// (transient network blips, hub timeouts); the original cumulative
|
|
968
|
+
// counter would eventually kill the process for noise. Cluster =
|
|
969
|
+
// real failure cascade. But macOS wake bursts also synthesize
|
|
970
|
+
// clusters: heartbeat / SSE / validator / merchantAgent / ATP all
|
|
971
|
+
// fire near-simultaneously on resume and any subsystem with an
|
|
972
|
+
// unhandled async-callback throw can blow past 5 rejections in
|
|
973
|
+
// seconds. We add a liveness gate so an actively-recovering
|
|
974
|
+
// daemon doesn't kill itself in the middle of a wake-recovery
|
|
975
|
+
// storm. Threshold and window widened to match the macOS-wake
|
|
976
|
+
// amplification observed in round-2 testing.
|
|
294
977
|
const REJECTION_WINDOW_MS = 5 * 60 * 1000;
|
|
295
|
-
const REJECTION_THRESHOLD =
|
|
978
|
+
const REJECTION_THRESHOLD = 10;
|
|
979
|
+
const RECENT_LIVENESS_MS = 60 * 1000;
|
|
296
980
|
let _rejectionTimestamps = [];
|
|
981
|
+
function _heartbeatLooksAlive() {
|
|
982
|
+
// Round-6 (§19.8): the previous implementation reached into
|
|
983
|
+
// the `_testing` namespace and returned false (= "treat as
|
|
984
|
+
// dead, exit on cluster") if that test-only accessor was
|
|
985
|
+
// unavailable. Under bundling / minification / a future
|
|
986
|
+
// refactor that drops the `_testing` export, this turned a
|
|
987
|
+
// recovery storm into a guaranteed exit -- the OPPOSITE of
|
|
988
|
+
// what the gate exists to do. Switched to the public
|
|
989
|
+
// getHeartbeatStats() API (which surfaces `running` and
|
|
990
|
+
// `lastTickAt` for exactly this purpose) and made the
|
|
991
|
+
// require failure path "fail open" -- assume alive so we
|
|
992
|
+
// don't kill an actively-recovering daemon just because the
|
|
993
|
+
// module load failed on this turn.
|
|
994
|
+
//
|
|
995
|
+
// Round-10: `running` + recent `lastTickAt` alone are not
|
|
996
|
+
// enough to claim "alive." `lastTickAt` is stamped at the
|
|
997
|
+
// TOP of every heartbeat tick, regardless of whether the
|
|
998
|
+
// tick actually makes progress -- including ticks that
|
|
999
|
+
// immediately bail out because the loop is spinning in a
|
|
1000
|
+
// reauth backoff window (see a2aProtocol.js getHeartbeatStats
|
|
1001
|
+
// comment near :2940, which acknowledges that the loop
|
|
1002
|
+
// showed `running: true, lastTickAt: <recent>` even when
|
|
1003
|
+
// silent for 30 min waiting on a reauth backoff). In that
|
|
1004
|
+
// state a rejection cascade originating OUTSIDE the
|
|
1005
|
+
// heartbeat would be repeatedly forgiven while the loop is
|
|
1006
|
+
// not actually making forward progress. Require additionally
|
|
1007
|
+
// that `consecutiveFailures === 0` and that we are not
|
|
1008
|
+
// currently inside a reauth backoff window, so "alive" means
|
|
1009
|
+
// "making progress," not just "ticking."
|
|
1010
|
+
//
|
|
1011
|
+
// Trade-off: a transient hub blip that bumps
|
|
1012
|
+
// `consecutiveFailures` to 1 will now NOT forgive a
|
|
1013
|
+
// concurrent rejection cascade. That is intentional --
|
|
1014
|
+
// cascade-forgiveness exists to avoid flapping during a
|
|
1015
|
+
// healthy loop; during an unhealthy loop we should not keep
|
|
1016
|
+
// absorbing rejections silently.
|
|
1017
|
+
try {
|
|
1018
|
+
const a2a = require('./src/gep/a2aProtocol.js');
|
|
1019
|
+
if (!a2a || typeof a2a.getHeartbeatStats !== 'function') {
|
|
1020
|
+
// Cannot read state -- fail open. A real wedged daemon
|
|
1021
|
+
// will be caught by the next rejection if/when stats
|
|
1022
|
+
// become available, or by other watchdogs.
|
|
1023
|
+
return true;
|
|
1024
|
+
}
|
|
1025
|
+
const s = a2a.getHeartbeatStats();
|
|
1026
|
+
if (!s || !s.running) return false;
|
|
1027
|
+
const last = s.lastTickAt || 0;
|
|
1028
|
+
if (!(last > 0 && (Date.now() - last) < RECENT_LIVENESS_MS)) return false;
|
|
1029
|
+
// Round-10: gate on success state, not just tick freshness.
|
|
1030
|
+
if ((s.consecutiveFailures || 0) > 0) return false;
|
|
1031
|
+
if ((s.reauthBackoffUntil || 0) > Date.now()) return false;
|
|
1032
|
+
return true;
|
|
1033
|
+
} catch (_) {
|
|
1034
|
+
// Module load threw -- fail open for the same reason as
|
|
1035
|
+
// above. A genuinely broken require would surface via
|
|
1036
|
+
// uncaughtException long before this gate matters.
|
|
1037
|
+
return true;
|
|
1038
|
+
}
|
|
1039
|
+
}
|
|
297
1040
|
process.on('unhandledRejection', (reason) => {
|
|
298
1041
|
const now = Date.now();
|
|
299
1042
|
_rejectionTimestamps.push(now);
|
|
@@ -302,7 +1045,15 @@ async function main() {
|
|
|
302
1045
|
});
|
|
303
1046
|
console.error('[FATAL] Unhandled promise rejection (' + _rejectionTimestamps.length + ' in window):', reason && reason.stack ? reason.stack : String(reason));
|
|
304
1047
|
if (_rejectionTimestamps.length >= REJECTION_THRESHOLD) {
|
|
305
|
-
|
|
1048
|
+
if (_heartbeatLooksAlive()) {
|
|
1049
|
+
console.warn('[FATAL] ' + _rejectionTimestamps.length + ' rejections within ' +
|
|
1050
|
+
(REJECTION_WINDOW_MS / 1000) + 's BUT heartbeat ticked in the last ' +
|
|
1051
|
+
(RECENT_LIVENESS_MS / 1000) + 's. Treating as recovery storm, not exiting. ' +
|
|
1052
|
+
'Resetting rejection window so a real subsequent cascade can still trip the trap.');
|
|
1053
|
+
_rejectionTimestamps = [];
|
|
1054
|
+
return;
|
|
1055
|
+
}
|
|
1056
|
+
console.error('[FATAL] ' + _rejectionTimestamps.length + ' unhandled rejections within ' + (REJECTION_WINDOW_MS / 1000) + 's and no recent heartbeat activity. Exiting to avoid corrupt state.');
|
|
306
1057
|
releaseLock();
|
|
307
1058
|
process.exit(1);
|
|
308
1059
|
}
|
|
@@ -508,10 +1259,29 @@ async function main() {
|
|
|
508
1259
|
if (consent.enabled) {
|
|
509
1260
|
const hubUrl = process.env.A2A_HUB_URL || process.env.EVOMAP_HUB_URL || '';
|
|
510
1261
|
if (hubUrl) {
|
|
511
|
-
|
|
512
|
-
|
|
513
|
-
|
|
514
|
-
|
|
1262
|
+
// Round-5: previously this bare start() call was a true
|
|
1263
|
+
// fire-and-forget. If autoBuyer.start returned a rejected
|
|
1264
|
+
// promise (transient hub error, bad config, mid-wake DNS
|
|
1265
|
+
// flap), the unhandledRejection escaped to the
|
|
1266
|
+
// process-level handler -- which, post round-3, only
|
|
1267
|
+
// exits if heartbeat is also dead. Net effect: daemon
|
|
1268
|
+
// stays alive but the autobuyer is half-initialized and
|
|
1269
|
+
// silently ignores claims. Attach a catch so the
|
|
1270
|
+
// operator can see the failure and the daemon-survival
|
|
1271
|
+
// gate is not relied on.
|
|
1272
|
+
try {
|
|
1273
|
+
const _autoBuyerPromise = autoBuyer.start({
|
|
1274
|
+
dailyCap: Number(process.env.ATP_AUTOBUY_DAILY_CAP_CREDITS) || undefined,
|
|
1275
|
+
perOrderCap: Number(process.env.ATP_AUTOBUY_PER_ORDER_CAP_CREDITS) || undefined,
|
|
1276
|
+
});
|
|
1277
|
+
if (_autoBuyerPromise && typeof _autoBuyerPromise.catch === 'function') {
|
|
1278
|
+
_autoBuyerPromise.catch(function (abErr) {
|
|
1279
|
+
console.warn('[ATP-AutoBuyer] start() rejected: ' + (abErr && abErr.message || abErr));
|
|
1280
|
+
});
|
|
1281
|
+
}
|
|
1282
|
+
} catch (abSyncErr) {
|
|
1283
|
+
console.warn('[ATP-AutoBuyer] start() threw synchronously: ' + (abSyncErr && abSyncErr.message || abSyncErr));
|
|
1284
|
+
}
|
|
515
1285
|
if (consent.source === 'default') {
|
|
516
1286
|
// First-run on a non-TTY (daemon, hook, CI) where the prompt
|
|
517
1287
|
// could not fire AND no env override + no ack file. autoBuyer
|
|
@@ -538,9 +1308,19 @@ async function main() {
|
|
|
538
1308
|
const hubUrl = process.env.A2A_HUB_URL || process.env.EVOMAP_HUB_URL || '';
|
|
539
1309
|
if (hubUrl) {
|
|
540
1310
|
const autoDeliver = require('./src/atp/autoDeliver');
|
|
541
|
-
|
|
542
|
-
|
|
543
|
-
|
|
1311
|
+
// Round-5: same fire-and-forget hardening as autoBuyer above.
|
|
1312
|
+
try {
|
|
1313
|
+
const _autoDeliverPromise = autoDeliver.start({
|
|
1314
|
+
pollMs: Number(process.env.ATP_AUTODELIVER_POLL_MS) || undefined,
|
|
1315
|
+
});
|
|
1316
|
+
if (_autoDeliverPromise && typeof _autoDeliverPromise.catch === 'function') {
|
|
1317
|
+
_autoDeliverPromise.catch(function (adErr) {
|
|
1318
|
+
console.warn('[ATP-AutoDeliver] start() rejected: ' + (adErr && adErr.message || adErr));
|
|
1319
|
+
});
|
|
1320
|
+
}
|
|
1321
|
+
} catch (adSyncErr) {
|
|
1322
|
+
console.warn('[ATP-AutoDeliver] start() threw synchronously: ' + (adSyncErr && adSyncErr.message || adSyncErr));
|
|
1323
|
+
}
|
|
544
1324
|
} else {
|
|
545
1325
|
console.warn('[ATP-AutoDeliver] autodeliver enabled but no hub URL configured, skipping.');
|
|
546
1326
|
}
|
|
@@ -1117,6 +1897,10 @@ async function main() {
|
|
|
1117
1897
|
|
|
1118
1898
|
try {
|
|
1119
1899
|
if (!getHubNodeSecret()) {
|
|
1900
|
+
// Round-7 (§20.7): if a daemon is up and we have no secret, we
|
|
1901
|
+
// would race the daemon's hello and silently corrupt its
|
|
1902
|
+
// node_secret. Refuse cleanly with a hint instead.
|
|
1903
|
+
refuseHelloIfDaemonRunning('fetch');
|
|
1120
1904
|
console.log('[fetch] No node_secret found. Sending hello to Hub to register...');
|
|
1121
1905
|
const helloResult = await sendHelloToHub();
|
|
1122
1906
|
if (!helloResult || !helloResult.ok) {
|
|
@@ -1324,6 +2108,9 @@ async function main() {
|
|
|
1324
2108
|
|
|
1325
2109
|
try {
|
|
1326
2110
|
if (!getHubNodeSecret()) {
|
|
2111
|
+
// Round-7 (§20.7): refuse a fresh hello if a live daemon owns
|
|
2112
|
+
// the lock; the daemon's secret will appear shortly.
|
|
2113
|
+
refuseHelloIfDaemonRunning('sync');
|
|
1327
2114
|
console.log('[sync] No node_secret found. Sending hello to Hub to register...');
|
|
1328
2115
|
const helloResult = await sendHelloToHub();
|
|
1329
2116
|
if (!helloResult || !helloResult.ok) {
|
|
@@ -1750,7 +2537,13 @@ async function main() {
|
|
|
1750
2537
|
// we just print the unset hint)
|
|
1751
2538
|
const path = require('path');
|
|
1752
2539
|
const fs = require('fs');
|
|
1753
|
-
|
|
2540
|
+
// Honor an explicit HOME override (used by tests to redirect to a fake
|
|
2541
|
+
// home) before falling back to os.homedir(). On POSIX, os.homedir() also
|
|
2542
|
+
// reads $HOME first, so this is a no-op in practice on macOS/Linux. On
|
|
2543
|
+
// Windows, os.homedir() reads %USERPROFILE% and ignores HOME -- without
|
|
2544
|
+
// this fallback, test/resetLocalSecret.test.js cannot inject a fake home
|
|
2545
|
+
// and the reset operates on the real user dir.
|
|
2546
|
+
const home = process.env.HOME || os.homedir();
|
|
1754
2547
|
const stateFile = path.join(home, '.evomap', 'mailbox', 'state.json');
|
|
1755
2548
|
const legacyFile = path.join(home, '.evomap', 'node_secret');
|
|
1756
2549
|
let cleared = 0;
|
|
@@ -1796,6 +2589,18 @@ async function main() {
|
|
|
1796
2589
|
// Invoked by a spawned Cursor sub-session after it has written the ATP
|
|
1797
2590
|
// task answer to a file. Drives publish -> task/complete -> atp/deliver.
|
|
1798
2591
|
try {
|
|
2592
|
+
// Round-8 (§21.8): if a daemon is up and the spawned subsession
|
|
2593
|
+
// somehow has no secret on disk, the inner completeAtpTask ->
|
|
2594
|
+
// _ensureNodeSecret -> sendHelloToHub call would race the
|
|
2595
|
+
// daemon's hello and silently corrupt the daemon's node_secret
|
|
2596
|
+
// (same vector round-7 §20.7 closed for fetch/sync). In the
|
|
2597
|
+
// common happy path the daemon already registered, the secret
|
|
2598
|
+
// exists, the guard is a no-op. Imported lazily so the helper
|
|
2599
|
+
// resolution does not slow down unrelated subcommands.
|
|
2600
|
+
try {
|
|
2601
|
+
const { getHubNodeSecret } = require('./src/gep/a2aProtocol');
|
|
2602
|
+
if (!getHubNodeSecret()) refuseHelloIfDaemonRunning('atp-complete');
|
|
2603
|
+
} catch (_) { /* never block ATP completion on a guard error */ }
|
|
1799
2604
|
const subArgs = args.slice(1);
|
|
1800
2605
|
function flag(name) {
|
|
1801
2606
|
const pref = '--' + name + '=';
|
|
@@ -1833,6 +2638,16 @@ async function main() {
|
|
|
1833
2638
|
|
|
1834
2639
|
} else if (command === 'buy' || command === 'orders' || command === 'verify' || command === 'atp') {
|
|
1835
2640
|
try {
|
|
2641
|
+
// Round-8 (§21.8): same daemon-vs-CLI race protection as fetch/sync
|
|
2642
|
+
// and atp-complete. The ATP runners (consumerAgent / merchantAgent
|
|
2643
|
+
// / atpExecute) all call sendHelloToHub when getHubNodeSecret() is
|
|
2644
|
+
// empty, which clobbers a running daemon's secret and silences it
|
|
2645
|
+
// for 30 min..4 h. The check is a no-op when a secret already
|
|
2646
|
+
// exists (the common case once the daemon has registered).
|
|
2647
|
+
try {
|
|
2648
|
+
const { getHubNodeSecret } = require('./src/gep/a2aProtocol');
|
|
2649
|
+
if (!getHubNodeSecret()) refuseHelloIfDaemonRunning(command);
|
|
2650
|
+
} catch (_) { /* never block ATP CLI on a guard error */ }
|
|
1836
2651
|
const atpCli = require('./src/atp/cli');
|
|
1837
2652
|
const subArgs = args.slice(1); // drop the command token (e.g. "buy") itself
|
|
1838
2653
|
let parsed;
|