@evomap/evolver 1.87.4 → 1.88.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/index.js +934 -33
- package/package.json +1 -1
- package/scripts/build_binaries.js +11 -1
- package/src/adapters/hookAdapter.js +3 -1
- package/src/adapters/scripts/_runtimePaths.js +24 -0
- package/src/adapters/scripts/evolver-session-end.js +110 -78
- package/src/adapters/scripts/evolver-session-start.js +100 -0
- package/src/config.js +43 -8
- package/src/evolve/guards.js +1 -1
- package/src/evolve/pipeline/collect.js +1 -1
- package/src/evolve/pipeline/dispatch.js +1 -1
- package/src/evolve/pipeline/enrich.js +1 -1
- package/src/evolve/pipeline/hub.js +1 -1
- package/src/evolve/pipeline/select.js +1 -1
- package/src/evolve/pipeline/signals.js +1 -1
- package/src/evolve/utils.js +1 -1
- package/src/evolve.js +1 -1
- package/src/forceUpdate.js +42 -21
- package/src/gep/a2aProtocol.js +1 -1
- package/src/gep/assetStore.js +40 -0
- package/src/gep/autoDistillConv.js +1 -0
- package/src/gep/autoDistillLlm.js +1 -0
- package/src/gep/bridge.js +69 -2
- package/src/gep/candidateEval.js +1 -1
- package/src/gep/candidates.js +1 -1
- package/src/gep/contentHash.js +1 -1
- package/src/gep/conversationSniffer.js +1 -0
- package/src/gep/crypto.js +1 -1
- package/src/gep/curriculum.js +1 -1
- package/src/gep/deviceId.js +1 -1
- package/src/gep/envFingerprint.js +1 -1
- package/src/gep/epigenetics.js +1 -1
- package/src/gep/execBridge.js +1 -0
- package/src/gep/explore.js +1 -1
- package/src/gep/featureFlags.js +4 -0
- package/src/gep/gitOps.js +7 -2
- package/src/gep/hash.js +1 -1
- package/src/gep/hubFetch.js +1 -1
- package/src/gep/hubReview.js +1 -1
- package/src/gep/hubSearch.js +1 -1
- package/src/gep/hubVerify.js +1 -1
- package/src/gep/idleScheduler.js +78 -0
- package/src/gep/learningSignals.js +1 -1
- package/src/gep/mailboxTransport.js +34 -0
- package/src/gep/memoryGraph.js +1 -1
- package/src/gep/memoryGraphAdapter.js +1 -1
- package/src/gep/mutation.js +1 -1
- package/src/gep/narrativeMemory.js +1 -1
- package/src/gep/openPRRegistry.js +1 -1
- package/src/gep/paths.js +16 -2
- package/src/gep/personality.js +1 -1
- package/src/gep/policyCheck.js +1 -1
- package/src/gep/prompt.js +1 -1
- package/src/gep/recallVerifier.js +1 -1
- package/src/gep/reflection.js +1 -1
- package/src/gep/selector.js +1 -1
- package/src/gep/skillDistiller.js +1 -1
- package/src/gep/solidify.js +1 -1
- package/src/gep/strategy.js +1 -1
- package/src/gep/validator/index.js +46 -1
- package/src/gep/validator/sandboxExecutor.js +10 -1
- package/src/gep/validator/stakeBootstrap.js +3 -0
- package/src/gep/workspaceKeychain.js +1 -1
- package/src/ops/lifecycle.js +79 -10
- package/src/ops/skills_monitor.js +2 -1
- package/src/proxy/index.js +31 -6
- package/src/proxy/lifecycle/manager.js +77 -4
- package/src/proxy/mailbox/store.js +52 -2
- package/src/proxy/server/settings.js +16 -2
- package/src/proxy/sync/inbound.js +14 -1
package/src/proxy/index.js
CHANGED
|
@@ -174,12 +174,31 @@ class EvoMapProxy {
|
|
|
174
174
|
|
|
175
175
|
async stop() {
|
|
176
176
|
if (!this._started) return;
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
177
|
+
// Tear down in deliberate reverse-of-start order, but don't let one
|
|
178
|
+
// failing step abort the rest: a thrown sync.stop() must not leave the
|
|
179
|
+
// HTTP server and store leaked. Each step is isolated; failures are
|
|
180
|
+
// warned and collected so shutdown always completes.
|
|
181
|
+
const steps = [
|
|
182
|
+
['sync', () => this.sync?.stop()],
|
|
183
|
+
['heartbeat', () => this.lifecycle?.stopHeartbeatLoop()],
|
|
184
|
+
['server', () => this.server?.stop()],
|
|
185
|
+
['store', () => this.store?.close()],
|
|
186
|
+
];
|
|
187
|
+
const errors = [];
|
|
188
|
+
for (const [name, fn] of steps) {
|
|
189
|
+
try {
|
|
190
|
+
await fn();
|
|
191
|
+
} catch (err) {
|
|
192
|
+
errors.push(err);
|
|
193
|
+
this.logger.warn('[proxy] error stopping ' + name + ': ' + (err && err.message ? err.message : err));
|
|
194
|
+
}
|
|
195
|
+
}
|
|
181
196
|
this._started = false;
|
|
182
|
-
|
|
197
|
+
if (errors.length) {
|
|
198
|
+
this.logger.log('[proxy] stopped with ' + errors.length + ' teardown error(s)');
|
|
199
|
+
} else {
|
|
200
|
+
this.logger.log('[proxy] stopped');
|
|
201
|
+
}
|
|
183
202
|
}
|
|
184
203
|
|
|
185
204
|
get mailbox() {
|
|
@@ -547,7 +566,13 @@ class EvoMapProxy {
|
|
|
547
566
|
headers: this.lifecycle._buildHeaders(),
|
|
548
567
|
signal: AbortSignal.timeout(10_000),
|
|
549
568
|
});
|
|
550
|
-
if (!res.ok)
|
|
569
|
+
if (!res.ok) {
|
|
570
|
+
// Drain body so undici can recycle the socket back to the pool.
|
|
571
|
+
// Without this, repeated non-ok responses leak pool slots and
|
|
572
|
+
// eventually starve the dispatcher.
|
|
573
|
+
try { res.body?.cancel?.().catch(() => {}); } catch {}
|
|
574
|
+
return { error: `Hub ${res.status}` };
|
|
575
|
+
}
|
|
551
576
|
return res.json();
|
|
552
577
|
} catch (err) {
|
|
553
578
|
return { error: err.message };
|
|
@@ -23,12 +23,30 @@ const HEARTBEAT_BACKOFF_CAP_MS = 15 * 60_000;
|
|
|
23
23
|
const HELLO_TIMEOUT = 15_000;
|
|
24
24
|
const HEARTBEAT_TIMEOUT = 10_000;
|
|
25
25
|
const MAX_REAUTH_ATTEMPTS = 2;
|
|
26
|
-
// First failure =
|
|
27
|
-
//
|
|
28
|
-
//
|
|
29
|
-
|
|
26
|
+
// First failure = 2 min, subsequent consecutive failures double up to ~4h.
|
|
27
|
+
// Aligned with a2aProtocol.js Round-9 reduction (was 30 min, caused
|
|
28
|
+
// "idle-death" for proxy-mode users: one benign 401 silenced the node for 30
|
|
29
|
+
// min, triggering stagnation kills and manual restart loops).
|
|
30
|
+
const REAUTH_BACKOFF_BASE_MS = 2 * 60_000;
|
|
30
31
|
const REAUTH_BACKOFF_MAX_MS = 4 * 60 * 60_000;
|
|
31
32
|
|
|
33
|
+
// Wall-clock drift detector tunables. Mirrors DRIFT_CHECK_MS /
|
|
34
|
+
// DRIFT_SLEEP_THRESHOLD_MS / DRIFT_LONG_SLEEP_THRESHOLD_MS in
|
|
35
|
+
// src/gep/a2aProtocol.js. setTimeout / setInterval fire on libuv's
|
|
36
|
+
// monotonic clock, which freezes while the host is suspended -- so a
|
|
37
|
+
// laptop closed for hours and reopened would not trigger any heartbeat
|
|
38
|
+
// tick until the next scheduled time, which under exponential backoff
|
|
39
|
+
// can sit at HEARTBEAT_BACKOFF_CAP_MS (15 min). Sampling Date.now()
|
|
40
|
+
// (wall clock) every DRIFT_CHECK_MS lets us detect the jump and
|
|
41
|
+
// immediately poke the heartbeat so recovery does not have to wait for
|
|
42
|
+
// the next natural tick. Long-sleep gap also clears reauth backoff:
|
|
43
|
+
// hub-side state we cached is almost certainly stale after a 30min+
|
|
44
|
+
// suspend, so force a clean retry path on wake instead of carrying the
|
|
45
|
+
// pre-sleep penalty through. R10 (#544).
|
|
46
|
+
const DRIFT_CHECK_MS = 30 * 1000;
|
|
47
|
+
const DRIFT_SLEEP_THRESHOLD_MS = 90 * 1000;
|
|
48
|
+
const DRIFT_LONG_SLEEP_THRESHOLD_MS = 30 * 60_000;
|
|
49
|
+
|
|
32
50
|
let _cachedFingerprint = null;
|
|
33
51
|
function _getEnvFingerprint() {
|
|
34
52
|
if (_cachedFingerprint) return _cachedFingerprint;
|
|
@@ -101,6 +119,8 @@ class LifecycleManager {
|
|
|
101
119
|
this._helloRateLimitUntil = 0;
|
|
102
120
|
this._reauthBackoffUntil = 0;
|
|
103
121
|
this._consecutiveReauthFailures = 0;
|
|
122
|
+
this._driftInterval = null;
|
|
123
|
+
this._lastDriftCheckAt = 0;
|
|
104
124
|
}
|
|
105
125
|
|
|
106
126
|
get nodeId() {
|
|
@@ -510,6 +530,55 @@ class LifecycleManager {
|
|
|
510
530
|
// to schedule its own next timer (a fresher path already owns it).
|
|
511
531
|
this._heartbeatGen = (this._heartbeatGen || 0) + 1;
|
|
512
532
|
this._heartbeatTick(this._heartbeatGen);
|
|
533
|
+
this._startDriftDetector();
|
|
534
|
+
}
|
|
535
|
+
|
|
536
|
+
// Sample wall-clock every DRIFT_CHECK_MS so macOS sleep / hypervisor
|
|
537
|
+
// pause / debugger break is detected and the heartbeat loop is poked
|
|
538
|
+
// back into action without waiting for the (possibly 15-min) backoff
|
|
539
|
+
// timer to fire on libuv's monotonic clock. R10 (#544).
|
|
540
|
+
_startDriftDetector() {
|
|
541
|
+
if (this._driftInterval) return;
|
|
542
|
+
this._lastDriftCheckAt = Date.now();
|
|
543
|
+
this._driftInterval = setInterval(() => {
|
|
544
|
+
// Wrap the whole body in try/catch -- this is a setInterval
|
|
545
|
+
// callback; any throw escaping it kills the detector itself,
|
|
546
|
+
// which is the bug we're protecting against.
|
|
547
|
+
try {
|
|
548
|
+
if (!this._running) return;
|
|
549
|
+
const now = Date.now();
|
|
550
|
+
const gap = now - this._lastDriftCheckAt;
|
|
551
|
+
this._lastDriftCheckAt = now;
|
|
552
|
+
if (gap > DRIFT_SLEEP_THRESHOLD_MS) {
|
|
553
|
+
try {
|
|
554
|
+
this.logger.warn(
|
|
555
|
+
`[lifecycle] wall-clock jump detected (+${Math.round(gap / 1000)}s); ` +
|
|
556
|
+
'likely sleep/wake or process suspension, poking heartbeat'
|
|
557
|
+
);
|
|
558
|
+
} catch (_) { /* logger broken; detector must still poke */ }
|
|
559
|
+
// Long-sleep recovery: the hub-side cached state we carried
|
|
560
|
+
// through the suspend is almost certainly stale. Clear reauth
|
|
561
|
+
// backoff so the next tick can try a clean recovery path
|
|
562
|
+
// instead of sitting out a pre-sleep penalty for up to 4h.
|
|
563
|
+
if (gap > DRIFT_LONG_SLEEP_THRESHOLD_MS) {
|
|
564
|
+
this._consecutiveReauthFailures = 0;
|
|
565
|
+
this._reauthBackoffUntil = 0;
|
|
566
|
+
try {
|
|
567
|
+
this.logger.warn(
|
|
568
|
+
`[lifecycle] long sleep (+${Math.round(gap / 60_000)}min) cleared reauth backoff`
|
|
569
|
+
);
|
|
570
|
+
} catch (_) { /* logger broken; non-fatal */ }
|
|
571
|
+
}
|
|
572
|
+
this.pokeHeartbeatLoop();
|
|
573
|
+
}
|
|
574
|
+
} catch (err) {
|
|
575
|
+
try { this.logger.error(`[lifecycle] drift detector threw: ${err && err.message}`); }
|
|
576
|
+
catch (_) { /* never let the detector escape */ }
|
|
577
|
+
}
|
|
578
|
+
}, DRIFT_CHECK_MS);
|
|
579
|
+
// Don't keep the event loop alive on behalf of the detector alone --
|
|
580
|
+
// matches the unref() used on _heartbeatTimer.
|
|
581
|
+
if (this._driftInterval.unref) this._driftInterval.unref();
|
|
513
582
|
}
|
|
514
583
|
|
|
515
584
|
async _heartbeatTick(myGen) {
|
|
@@ -574,6 +643,10 @@ class LifecycleManager {
|
|
|
574
643
|
clearTimeout(this._heartbeatTimer);
|
|
575
644
|
this._heartbeatTimer = null;
|
|
576
645
|
}
|
|
646
|
+
if (this._driftInterval) {
|
|
647
|
+
clearInterval(this._driftInterval);
|
|
648
|
+
this._driftInterval = null;
|
|
649
|
+
}
|
|
577
650
|
}
|
|
578
651
|
|
|
579
652
|
_shouldUpgrade(minVersion) {
|
|
@@ -68,6 +68,27 @@ function safeParse(payload) {
|
|
|
68
68
|
try { return JSON.parse(payload); } catch { return payload; }
|
|
69
69
|
}
|
|
70
70
|
|
|
71
|
+
// Round-9: the round-8 cross-process append lock (§21.7) was REMOVED.
|
|
72
|
+
// Its premise -- that fs.appendFileSync to a regular file can interleave
|
|
73
|
+
// bytes mid-line unless each write stays under PIPE_BUF (512 B darwin,
|
|
74
|
+
// 4096 B linux) -- conflated two different POSIX guarantees. PIPE_BUF
|
|
75
|
+
// atomicity is defined for PIPES/FIFOs, not regular files. A single
|
|
76
|
+
// O_APPEND write() to a regular file is positioned atomically at EOF and
|
|
77
|
+
// is not interleaved with other appenders on the local filesystems evolver
|
|
78
|
+
// uses (~/.evomap); this was verified empirically on darwin/APFS --
|
|
79
|
+
// concurrent 4 KB..1 MB appends from 6 writers produced zero torn lines.
|
|
80
|
+
// So the lock guarded a non-problem. Worse, its 5 s deadline with a
|
|
81
|
+
// busy-wait (Atomics.wait, then a spin-loop fallback) ran on the single
|
|
82
|
+
// JS thread, so under any real contention it BLOCKED the event loop --
|
|
83
|
+
// starving the very heartbeat/SSE/HTTP it shared the process with, i.e.
|
|
84
|
+
// it could itself produce the "process alive but inert" symptom it claimed
|
|
85
|
+
// to prevent. fs.appendFileSync writes the whole buffer with O_APPEND, so
|
|
86
|
+
// a single record lands as one atomic append.
|
|
87
|
+
//
|
|
88
|
+
// Windows note: PIPE_BUF is a POSIX concept; it does not exist on Windows.
|
|
89
|
+
// Windows NTFS provides the same atomicity guarantee for O_APPEND writes to
|
|
90
|
+
// regular files that POSIX local filesystems do, so the removal above is
|
|
91
|
+
// equally valid on Windows. No platform-specific code is needed here.
|
|
71
92
|
function appendLine(filePath, obj) {
|
|
72
93
|
fs.appendFileSync(filePath, JSON.stringify(obj) + '\n', 'utf8');
|
|
73
94
|
}
|
|
@@ -135,8 +156,27 @@ class MailboxStore {
|
|
|
135
156
|
_persistState() {
|
|
136
157
|
const dir = path.dirname(this._stateFile);
|
|
137
158
|
if (!fs.existsSync(dir)) fs.mkdirSync(dir, { recursive: true });
|
|
138
|
-
|
|
159
|
+
// Round-7 (§20.5): per-PID tmp path. Two evolver processes (daemon +
|
|
160
|
+
// ad-hoc CLI / proxy + loop) writing to the same `${stateFile}.tmp`
|
|
161
|
+
// would otherwise interleave: process B's writeFileSync truncates
|
|
162
|
+
// A's tmp mid-write, then B's rename completes with B's truncated
|
|
163
|
+
// payload as the final state.json. `state.json` holds the cached
|
|
164
|
+
// node_secret after a hub rotation -- a torn write here is the
|
|
165
|
+
// load-bearing trigger for the "401-loop -> reauth backoff -> dead
|
|
166
|
+
// for 30 min..4 h" symptom this branch targets. Matches the
|
|
167
|
+
// precedent set by _persistNodeSecret in src/gep/a2aProtocol.js.
|
|
168
|
+
const tmp = `${this._stateFile}.${process.pid}.tmp`;
|
|
139
169
|
fs.writeFileSync(tmp, JSON.stringify(this._state, null, 2) + '\n', 'utf8');
|
|
170
|
+
// Windows: fs.renameSync throws EPERM when the destination file already
|
|
171
|
+
// exists, unlike POSIX where rename(2) atomically replaces the target.
|
|
172
|
+
// Remove the destination first so the rename succeeds on all platforms.
|
|
173
|
+
// The window between unlink and rename is intentionally tiny; a crash in
|
|
174
|
+
// that window leaves the tmp file behind (recovered on next _persistState).
|
|
175
|
+
if (process.platform === 'win32') {
|
|
176
|
+
try { fs.unlinkSync(this._stateFile); } catch (e) {
|
|
177
|
+
if (e.code !== 'ENOENT') throw e;
|
|
178
|
+
}
|
|
179
|
+
}
|
|
140
180
|
fs.renameSync(tmp, this._stateFile);
|
|
141
181
|
}
|
|
142
182
|
|
|
@@ -384,7 +424,10 @@ class MailboxStore {
|
|
|
384
424
|
// --- Compaction (reduces JSONL file size by rewriting only current state) ---
|
|
385
425
|
|
|
386
426
|
compact() {
|
|
387
|
-
|
|
427
|
+
// Round-7 (§20.5): same per-PID tmp rationale as _persistState.
|
|
428
|
+
// Two concurrent compact() calls (daemon + ad-hoc CLI) racing on
|
|
429
|
+
// the same `${messagesFile}.tmp` lose the loser's compacted log.
|
|
430
|
+
const tmpFile = `${this._messagesFile}.${process.pid}.tmp`;
|
|
388
431
|
const entries = [];
|
|
389
432
|
for (const [, msg] of this._messages) {
|
|
390
433
|
entries.push(msg);
|
|
@@ -396,6 +439,13 @@ class MailboxStore {
|
|
|
396
439
|
fs.writeSync(fd, JSON.stringify(msg) + '\n');
|
|
397
440
|
}
|
|
398
441
|
fs.closeSync(fd);
|
|
442
|
+
// Windows: renameSync throws EPERM when the destination already exists.
|
|
443
|
+
// Remove it first so the swap succeeds on all platforms.
|
|
444
|
+
if (process.platform === 'win32') {
|
|
445
|
+
try { fs.unlinkSync(this._messagesFile); } catch (e) {
|
|
446
|
+
if (e.code !== 'ENOENT') throw e;
|
|
447
|
+
}
|
|
448
|
+
}
|
|
399
449
|
fs.renameSync(tmpFile, this._messagesFile);
|
|
400
450
|
this._rebuildIndex();
|
|
401
451
|
}
|
|
@@ -37,9 +37,13 @@ function writeSettings(data) {
|
|
|
37
37
|
}
|
|
38
38
|
const current = readSettings();
|
|
39
39
|
const merged = { ...current, ...data };
|
|
40
|
+
// NOTE(windows): mode 0o600 is silently ignored on Windows. The settings
|
|
41
|
+
// file (which may contain proxy credentials) will NOT be owner-read-only.
|
|
42
|
+
// Only Windows user-profile directory ACLs provide isolation. The chmodSync
|
|
43
|
+
// call below is also a no-op on Windows but is retained for Unix correctness.
|
|
40
44
|
fs.writeFileSync(file, JSON.stringify(merged, null, 2), { encoding: 'utf8', mode: 0o600 });
|
|
41
45
|
// mode: 0o600 only applies on creation; explicitly chmod to tighten pre-existing files
|
|
42
|
-
try { fs.chmodSync(file, 0o600); } catch {}
|
|
46
|
+
try { fs.chmodSync(file, 0o600); } catch { /* best-effort; no-op on Windows */ }
|
|
43
47
|
return merged;
|
|
44
48
|
}
|
|
45
49
|
|
|
@@ -59,9 +63,19 @@ function isStaleProxy() {
|
|
|
59
63
|
const pid = settings.proxy?.pid;
|
|
60
64
|
if (!pid) return false;
|
|
61
65
|
try {
|
|
66
|
+
// process.kill(pid, 0) probes whether the process exists without sending a
|
|
67
|
+
// signal. On POSIX it throws ESRCH when the PID is gone. On Windows the
|
|
68
|
+
// Node.js runtime maps this to the same behavior (ESRCH via uv_kill), so
|
|
69
|
+
// the cross-platform semantics are consistent. If the current process does
|
|
70
|
+
// not have permission to query the target PID, EPERM is thrown -- that
|
|
71
|
+
// means the PID exists and is owned by another user, so we treat it as
|
|
72
|
+
// live (not stale) rather than crashing.
|
|
62
73
|
process.kill(pid, 0);
|
|
63
74
|
return false;
|
|
64
|
-
} catch {
|
|
75
|
+
} catch (err) {
|
|
76
|
+
// ESRCH: process does not exist -> stale.
|
|
77
|
+
// EPERM: process exists but is not ours -> not stale (leave settings alone).
|
|
78
|
+
if (err.code === 'EPERM') return false;
|
|
65
79
|
return true;
|
|
66
80
|
}
|
|
67
81
|
}
|
|
@@ -83,12 +83,25 @@ class InboundSync {
|
|
|
83
83
|
|
|
84
84
|
try {
|
|
85
85
|
const senderId = this.store.getState('node_id');
|
|
86
|
-
|
|
86
|
+
// Round-8 (§21.5): drain the response body so the undici long-poll
|
|
87
|
+
// dispatcher pool is not leaked one socket per ack. ackDelivered
|
|
88
|
+
// is called every inbound poll cycle (default 1-10s); the
|
|
89
|
+
// pre-round-8 code captured no reference to res and never called
|
|
90
|
+
// .json()/.text()/body.cancel(), so each ack pinned a socket
|
|
91
|
+
// until GC. After a few minutes of activity the strict-pool was
|
|
92
|
+
// exhausted and proxy-mode heartbeats hung on next acquire --
|
|
93
|
+
// matches the "alive once then dead" user symptom in proxy mode.
|
|
94
|
+
const res = await hubFetch(endpoint, {
|
|
87
95
|
method: 'POST',
|
|
88
96
|
headers: this.getHeaders(),
|
|
89
97
|
body: JSON.stringify({ sender_id: senderId, message_ids: delivered.map(m => m.id) }),
|
|
90
98
|
signal: AbortSignal.timeout(10_000),
|
|
91
99
|
});
|
|
100
|
+
try {
|
|
101
|
+
if (res && res.body && typeof res.body.cancel === 'function') {
|
|
102
|
+
await res.body.cancel().catch(() => {});
|
|
103
|
+
}
|
|
104
|
+
} catch (_) { /* never escape the drain helper */ }
|
|
92
105
|
return { acked: delivered.length };
|
|
93
106
|
} catch (err) {
|
|
94
107
|
this.logger.error(`[inbound] ack failed: ${err.message}`);
|