@evomap/evolver 1.82.0 → 1.83.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/index.js +24 -0
- package/package.json +2 -1
- package/scripts/recall-verify-report.js +234 -0
- package/skills/_meta/SKILL.md +41 -0
- package/skills/index.json +14 -0
- package/src/evolve/guards.js +1 -1
- package/src/evolve/pipeline/collect.js +1 -1
- package/src/evolve/pipeline/dispatch.js +1 -1
- package/src/evolve/pipeline/enrich.js +1 -1
- package/src/evolve/pipeline/hub.js +1 -1
- package/src/evolve/pipeline/select.js +1 -1
- package/src/evolve/pipeline/signals.js +1 -1
- package/src/evolve/utils.js +1 -1
- package/src/evolve.js +1 -1
- package/src/forceUpdate.js +50 -16
- package/src/gep/.integrity +0 -0
- package/src/gep/a2aProtocol.js +1 -1
- package/src/gep/candidateEval.js +1 -1
- package/src/gep/candidates.js +1 -1
- package/src/gep/contentHash.js +1 -1
- package/src/gep/crypto.js +1 -1
- package/src/gep/curriculum.js +1 -1
- package/src/gep/deviceId.js +1 -1
- package/src/gep/envFingerprint.js +1 -1
- package/src/gep/epigenetics.js +1 -1
- package/src/gep/explore.js +1 -1
- package/src/gep/hash.js +1 -1
- package/src/gep/hubReview.js +1 -1
- package/src/gep/hubSearch.js +1 -1
- package/src/gep/hubVerify.js +1 -1
- package/src/gep/integrityCheck.js +1 -1
- package/src/gep/learningSignals.js +1 -1
- package/src/gep/memoryGraph.js +1 -1
- package/src/gep/memoryGraphAdapter.js +1 -1
- package/src/gep/mutation.js +1 -1
- package/src/gep/narrativeMemory.js +1 -1
- package/src/gep/openPRRegistry.js +1 -1
- package/src/gep/paths.js +20 -0
- package/src/gep/personality.js +1 -1
- package/src/gep/policyCheck.js +1 -1
- package/src/gep/prompt.js +1 -1
- package/src/gep/recallVerifier.js +1 -0
- package/src/gep/reflection.js +1 -1
- package/src/gep/selector.js +1 -1
- package/src/gep/shield.js +1 -1
- package/src/gep/skill2gep.js +40 -2
- package/src/gep/skillDistiller.js +1 -1
- package/src/gep/solidify.js +1 -1
- package/src/gep/strategy.js +1 -1
package/index.js
CHANGED
|
@@ -349,6 +349,20 @@ async function main() {
|
|
|
349
349
|
}
|
|
350
350
|
} catch (_mirrorDiagErr) { /* diagnostics must never block startup */ }
|
|
351
351
|
|
|
352
|
+
// RecallVerify diagnostic banner: parallel to HubMirror but reads its
|
|
353
|
+
// own env, since verification can run with HubMirror off (verifier
|
|
354
|
+
// events are local-only on first ship).
|
|
355
|
+
try {
|
|
356
|
+
const enabled = String(process.env.EVOLVE_RECALL_VERIFY || '1') !== '0';
|
|
357
|
+
const sampleRateRaw = Number(process.env.EVOLVE_RECALL_VERIFY_SAMPLE_RATE);
|
|
358
|
+
const sampleRate = Number.isFinite(sampleRateRaw) && sampleRateRaw >= 0 && sampleRateRaw <= 1 ? sampleRateRaw : 1.0;
|
|
359
|
+
if (!enabled) {
|
|
360
|
+
console.log('[RecallVerify] DISABLED — set EVOLVE_RECALL_VERIFY=1 to verify published assets round-trip via Hub Phase 2 lookup.');
|
|
361
|
+
} else {
|
|
362
|
+
console.log(`[RecallVerify] ENABLED — verifying published assets via Hub Phase 2 lookup, sample_rate=${sampleRate}. Set EVOLVE_RECALL_VERIFY=0 to disable.`);
|
|
363
|
+
}
|
|
364
|
+
} catch (_rvDiagErr) { /* diagnostics must never block startup */ }
|
|
365
|
+
|
|
352
366
|
const { getEvolutionDir, getEvolverLogPath } = require('./src/gep/paths');
|
|
353
367
|
const solidifyStatePath = path.join(getEvolutionDir(), 'evolution_solidify_state.json');
|
|
354
368
|
const cycleProgressPath = path.join(getEvolutionDir(), 'cycle_progress.json');
|
|
@@ -398,6 +412,16 @@ async function main() {
|
|
|
398
412
|
console.warn('[Heartbeat] Failed to start: ' + (e.message || e));
|
|
399
413
|
}
|
|
400
414
|
|
|
415
|
+
// RecallVerify worker: starts once per process; drains the publish-
|
|
416
|
+
// verification queue with backoff. unref'd so it never blocks exit.
|
|
417
|
+
try {
|
|
418
|
+
if (String(process.env.EVOLVE_RECALL_VERIFY || '1') !== '0') {
|
|
419
|
+
require('./src/gep/recallVerifier').startWorker();
|
|
420
|
+
}
|
|
421
|
+
} catch (rvStartErr) {
|
|
422
|
+
console.warn('[RecallVerify] startWorker failed: ' + (rvStartErr && rvStartErr.message || rvStartErr));
|
|
423
|
+
}
|
|
424
|
+
|
|
401
425
|
// Validator daemon: independent timer that fetches and executes
|
|
402
426
|
// validation tasks regardless of the main evolve loop's idle gating.
|
|
403
427
|
// Honors EVOLVER_VALIDATOR_ENABLED and the persisted feature flag.
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@evomap/evolver",
|
|
3
|
-
"version": "1.
|
|
3
|
+
"version": "1.83.0",
|
|
4
4
|
"description": "A GEP-powered self-evolution engine for AI agents. Features automated log analysis and Genome Evolution Protocol (GEP) for auditable, reusable evolution assets.",
|
|
5
5
|
"main": "index.js",
|
|
6
6
|
"bin": {
|
|
@@ -46,6 +46,7 @@
|
|
|
46
46
|
"index.js",
|
|
47
47
|
"src/",
|
|
48
48
|
"scripts/",
|
|
49
|
+
"skills/",
|
|
49
50
|
"README.md",
|
|
50
51
|
"README.zh-CN.md",
|
|
51
52
|
"README.ja-JP.md",
|
|
@@ -0,0 +1,234 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
'use strict';
|
|
3
|
+
|
|
4
|
+
// recall-verify-report — aggregate kind=recall_verify events from the
|
|
5
|
+
// memory graph jsonl into a Markdown table. Exit 0 = ship gate green
|
|
6
|
+
// (every asset_type has success_rate >= 0.95 and 0 mismatches), exit 2
|
|
7
|
+
// otherwise. Designed to be scripted into deploy.sh as a pre-publish gate.
|
|
8
|
+
//
|
|
9
|
+
// Usage:
|
|
10
|
+
// node scripts/recall-verify-report.js # all events
|
|
11
|
+
// node scripts/recall-verify-report.js --since 1h # last hour
|
|
12
|
+
// node scripts/recall-verify-report.js --since 30m
|
|
13
|
+
// node scripts/recall-verify-report.js --since 2026-05-16T10:00:00Z
|
|
14
|
+
// node scripts/recall-verify-report.js --json # raw JSON for piping
|
|
15
|
+
|
|
16
|
+
const { tryReadMemoryGraphEvents } = require('../src/gep/memoryGraph');
|
|
17
|
+
|
|
18
|
+
const SUCCESS_THRESHOLD = 0.95;
|
|
19
|
+
|
|
20
|
+
function parseSince(value) {
|
|
21
|
+
if (!value) return null;
|
|
22
|
+
// Try relative duration first (1h / 30m / 2d / 45s) — unambiguous.
|
|
23
|
+
const m = String(value).match(/^(\d+)\s*(s|m|h|d)$/i);
|
|
24
|
+
if (m) {
|
|
25
|
+
const n = Number(m[1]);
|
|
26
|
+
const unit = m[2].toLowerCase();
|
|
27
|
+
const factor = unit === 's' ? 1000 : unit === 'm' ? 60000 : unit === 'h' ? 3600000 : 86400000;
|
|
28
|
+
return Date.now() - (n * factor);
|
|
29
|
+
}
|
|
30
|
+
// Then ISO-8601. Require '-' or 'T' so we don't accept loose numeric
|
|
31
|
+
// strings like "5" → year 2001.
|
|
32
|
+
if (/[-T]/.test(String(value))) {
|
|
33
|
+
const iso = Date.parse(value);
|
|
34
|
+
if (!Number.isNaN(iso)) return iso;
|
|
35
|
+
}
|
|
36
|
+
return undefined;
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
function parseArgs(argv) {
|
|
40
|
+
const args = { since: null, json: false };
|
|
41
|
+
for (let i = 2; i < argv.length; i++) {
|
|
42
|
+
const a = argv[i];
|
|
43
|
+
if (a === '--json') args.json = true;
|
|
44
|
+
else if (a === '--since') {
|
|
45
|
+
args.since = argv[++i];
|
|
46
|
+
} else if (a.startsWith('--since=')) {
|
|
47
|
+
args.since = a.slice('--since='.length);
|
|
48
|
+
} else if (a === '--help' || a === '-h') {
|
|
49
|
+
args.help = true;
|
|
50
|
+
}
|
|
51
|
+
}
|
|
52
|
+
return args;
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
function percentile(sorted, p) {
|
|
56
|
+
if (!sorted.length) return 0;
|
|
57
|
+
const idx = Math.min(sorted.length - 1, Math.floor(sorted.length * p));
|
|
58
|
+
return sorted[idx];
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
function aggregate(events) {
|
|
62
|
+
const byType = new Map();
|
|
63
|
+
for (const ev of events) {
|
|
64
|
+
if (!ev || ev.kind !== 'recall_verify') continue;
|
|
65
|
+
const type = (ev.asset && ev.asset.type) || 'Unknown';
|
|
66
|
+
if (!byType.has(type)) {
|
|
67
|
+
byType.set(type, {
|
|
68
|
+
type,
|
|
69
|
+
total: 0,
|
|
70
|
+
ok: 0,
|
|
71
|
+
missing: 0,
|
|
72
|
+
mismatch: 0,
|
|
73
|
+
skipped: 0,
|
|
74
|
+
latencies: [],
|
|
75
|
+
ages: [],
|
|
76
|
+
});
|
|
77
|
+
}
|
|
78
|
+
const bucket = byType.get(type);
|
|
79
|
+
bucket.total += 1;
|
|
80
|
+
const v = ev.verification || {};
|
|
81
|
+
if (v.outcome === 'roundtrip_ok') bucket.ok += 1;
|
|
82
|
+
else if (v.outcome === 'roundtrip_missing') bucket.missing += 1;
|
|
83
|
+
else if (v.outcome === 'roundtrip_mismatch') bucket.mismatch += 1;
|
|
84
|
+
else bucket.skipped += 1;
|
|
85
|
+
if (Number.isFinite(v.latency_ms)) bucket.latencies.push(v.latency_ms);
|
|
86
|
+
if (Number.isFinite(v.age_at_verify_ms)) bucket.ages.push(v.age_at_verify_ms);
|
|
87
|
+
}
|
|
88
|
+
const rows = [];
|
|
89
|
+
for (const bucket of byType.values()) {
|
|
90
|
+
const denom = bucket.ok + bucket.missing + bucket.mismatch;
|
|
91
|
+
bucket.success_rate = denom > 0 ? bucket.ok / denom : 0;
|
|
92
|
+
bucket.latencies.sort(function (a, b) { return a - b; });
|
|
93
|
+
bucket.ages.sort(function (a, b) { return a - b; });
|
|
94
|
+
bucket.p50_latency_ms = percentile(bucket.latencies, 0.5);
|
|
95
|
+
bucket.p95_latency_ms = percentile(bucket.latencies, 0.95);
|
|
96
|
+
bucket.p99_latency_ms = percentile(bucket.latencies, 0.99);
|
|
97
|
+
bucket.p50_age_ms = percentile(bucket.ages, 0.5);
|
|
98
|
+
bucket.p95_age_ms = percentile(bucket.ages, 0.95);
|
|
99
|
+
bucket.p99_age_ms = percentile(bucket.ages, 0.99);
|
|
100
|
+
delete bucket.latencies;
|
|
101
|
+
delete bucket.ages;
|
|
102
|
+
rows.push(bucket);
|
|
103
|
+
}
|
|
104
|
+
rows.sort(function (a, b) { return a.type.localeCompare(b.type); });
|
|
105
|
+
|
|
106
|
+
const totals = { type: 'TOTAL', total: 0, ok: 0, missing: 0, mismatch: 0, skipped: 0 };
|
|
107
|
+
for (const r of rows) {
|
|
108
|
+
totals.total += r.total;
|
|
109
|
+
totals.ok += r.ok;
|
|
110
|
+
totals.missing += r.missing;
|
|
111
|
+
totals.mismatch += r.mismatch;
|
|
112
|
+
totals.skipped += r.skipped;
|
|
113
|
+
}
|
|
114
|
+
const totalsDenom = totals.ok + totals.missing + totals.mismatch;
|
|
115
|
+
totals.success_rate = totalsDenom > 0 ? totals.ok / totalsDenom : 0;
|
|
116
|
+
|
|
117
|
+
// Gate severity is monotonic: once a row triggers a worse state, later
|
|
118
|
+
// rows cannot downgrade it. Without this, AntiPattern@0% (RED) followed
|
|
119
|
+
// by Capsule@90% (YELLOW) would report YELLOW — misleading dashboards
|
|
120
|
+
// even though the exit code still reflects RED. (Bugbot review on PR #53.)
|
|
121
|
+
// RANK is the comparison ordinal: GREEN(0) < YELLOW(1) < RED(2).
|
|
122
|
+
const RANK = { GREEN: 0, YELLOW: 1, RED: 2 };
|
|
123
|
+
function escalate(current, candidate) {
|
|
124
|
+
return RANK[candidate] > RANK[current] ? candidate : current;
|
|
125
|
+
}
|
|
126
|
+
let gate = 'GREEN';
|
|
127
|
+
if (rows.length === 0) gate = 'RED';
|
|
128
|
+
else {
|
|
129
|
+
for (const r of rows) {
|
|
130
|
+
if (r.mismatch > 0) { gate = 'RED'; break; }
|
|
131
|
+
if (r.success_rate < SUCCESS_THRESHOLD) {
|
|
132
|
+
gate = escalate(gate, r.success_rate >= 0.85 ? 'YELLOW' : 'RED');
|
|
133
|
+
}
|
|
134
|
+
}
|
|
135
|
+
}
|
|
136
|
+
return { rows, totals, gate };
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
function fmtPct(rate) {
|
|
140
|
+
return (rate * 100).toFixed(1) + '%';
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
function fmtMs(n) {
|
|
144
|
+
if (!Number.isFinite(n) || n === 0) return '—';
|
|
145
|
+
if (n < 1000) return Math.round(n) + 'ms';
|
|
146
|
+
return (n / 1000).toFixed(1) + 's';
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
function printMarkdown(result, since) {
|
|
150
|
+
const sinceStr = since ? new Date(since).toISOString() : 'all time';
|
|
151
|
+
console.log('# RecallVerify Report (since ' + sinceStr + ')');
|
|
152
|
+
console.log('');
|
|
153
|
+
if (result.rows.length === 0) {
|
|
154
|
+
console.log('_No `recall_verify` events found in memory graph._');
|
|
155
|
+
console.log('');
|
|
156
|
+
console.log('Ship gate: **RED** (no data — feature may be disabled or daemon has not run a publish cycle yet)');
|
|
157
|
+
return;
|
|
158
|
+
}
|
|
159
|
+
console.log('| asset_type | total | ok | missing | mismatch | skipped | success_rate | p50_latency | p99_latency | p50_age | p99_age |');
|
|
160
|
+
console.log('|--------------|------:|----:|--------:|---------:|--------:|-------------:|------------:|------------:|--------:|--------:|');
|
|
161
|
+
for (const r of result.rows) {
|
|
162
|
+
console.log('| ' + r.type.padEnd(12) +
|
|
163
|
+
' | ' + String(r.total).padStart(5) +
|
|
164
|
+
' | ' + String(r.ok).padStart(3) +
|
|
165
|
+
' | ' + String(r.missing).padStart(7) +
|
|
166
|
+
' | ' + String(r.mismatch).padStart(8) +
|
|
167
|
+
' | ' + String(r.skipped).padStart(7) +
|
|
168
|
+
' | ' + fmtPct(r.success_rate).padStart(12) +
|
|
169
|
+
' | ' + fmtMs(r.p50_latency_ms).padStart(11) +
|
|
170
|
+
' | ' + fmtMs(r.p99_latency_ms).padStart(11) +
|
|
171
|
+
' | ' + fmtMs(r.p50_age_ms).padStart(7) +
|
|
172
|
+
' | ' + fmtMs(r.p99_age_ms).padStart(7) +
|
|
173
|
+
' |');
|
|
174
|
+
}
|
|
175
|
+
const t = result.totals;
|
|
176
|
+
console.log('| ' + 'TOTAL'.padEnd(12) +
|
|
177
|
+
' | ' + String(t.total).padStart(5) +
|
|
178
|
+
' | ' + String(t.ok).padStart(3) +
|
|
179
|
+
' | ' + String(t.missing).padStart(7) +
|
|
180
|
+
' | ' + String(t.mismatch).padStart(8) +
|
|
181
|
+
' | ' + String(t.skipped).padStart(7) +
|
|
182
|
+
' | ' + fmtPct(t.success_rate).padStart(12) +
|
|
183
|
+
' | ' + '—'.padStart(11) +
|
|
184
|
+
' | ' + '—'.padStart(11) +
|
|
185
|
+
' | ' + '—'.padStart(7) +
|
|
186
|
+
' | ' + '—'.padStart(7) +
|
|
187
|
+
' |');
|
|
188
|
+
console.log('');
|
|
189
|
+
console.log('Ship gate: **' + result.gate + '**' + (result.gate === 'GREEN' ? ' (exit 0)' : ' (exit 2)'));
|
|
190
|
+
}
|
|
191
|
+
|
|
192
|
+
function main() {
|
|
193
|
+
const args = parseArgs(process.argv);
|
|
194
|
+
if (args.help) {
|
|
195
|
+
console.log('Usage: node scripts/recall-verify-report.js [--since <Nh|Nm|Nd|ISO>] [--json]');
|
|
196
|
+
process.exit(0);
|
|
197
|
+
}
|
|
198
|
+
|
|
199
|
+
let sinceMs = null;
|
|
200
|
+
if (args.since) {
|
|
201
|
+
const parsed = parseSince(args.since);
|
|
202
|
+
if (parsed === undefined) {
|
|
203
|
+
console.error('Error: --since must be ISO-8601 or a duration like 1h / 30m / 2d (got: ' + args.since + ')');
|
|
204
|
+
process.exit(1);
|
|
205
|
+
}
|
|
206
|
+
sinceMs = parsed;
|
|
207
|
+
}
|
|
208
|
+
|
|
209
|
+
const allEvents = tryReadMemoryGraphEvents(20000);
|
|
210
|
+
const filtered = allEvents.filter(function (ev) {
|
|
211
|
+
if (!ev || ev.kind !== 'recall_verify') return false;
|
|
212
|
+
if (sinceMs != null && Number.isFinite(ev.ts) && ev.ts < sinceMs) return false;
|
|
213
|
+
return true;
|
|
214
|
+
});
|
|
215
|
+
|
|
216
|
+
const result = aggregate(filtered);
|
|
217
|
+
|
|
218
|
+
if (args.json) {
|
|
219
|
+
console.log(JSON.stringify({
|
|
220
|
+
since: sinceMs ? new Date(sinceMs).toISOString() : null,
|
|
221
|
+
...result,
|
|
222
|
+
}, null, 2));
|
|
223
|
+
} else {
|
|
224
|
+
printMarkdown(result, sinceMs);
|
|
225
|
+
}
|
|
226
|
+
|
|
227
|
+
process.exit(result.gate === 'GREEN' ? 0 : 2);
|
|
228
|
+
}
|
|
229
|
+
|
|
230
|
+
if (require.main === module) {
|
|
231
|
+
main();
|
|
232
|
+
}
|
|
233
|
+
|
|
234
|
+
module.exports = { aggregate, parseSince, parseArgs };
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: _meta
|
|
3
|
+
version: 0.1.0
|
|
4
|
+
description: Bootstrap skill that teaches the agent how to discover and load other skills on demand via gep_list_skill / gep_load_skill.
|
|
5
|
+
tags: meta, bootstrap, evolver
|
|
6
|
+
---
|
|
7
|
+
|
|
8
|
+
# On-demand skill loading
|
|
9
|
+
|
|
10
|
+
Evolver ships a library of skills (markdown playbooks under `skills/`). To
|
|
11
|
+
keep your starting context small, only this meta-skill is injected by
|
|
12
|
+
default. Pull in additional skills when you actually need them.
|
|
13
|
+
|
|
14
|
+
## Tools
|
|
15
|
+
|
|
16
|
+
- `gep_list_skill` — see what's available.
|
|
17
|
+
- `source`: `bundled` (shipped with evolver), `local` (`~/.claude/skills/`),
|
|
18
|
+
`hub` (community), or `all` (default).
|
|
19
|
+
- `query`: optional substring filter on name / description / tags.
|
|
20
|
+
- `gep_load_skill` — fetch one skill's content.
|
|
21
|
+
- `name`: the skill name (use `<source>:<name>` to disambiguate collisions).
|
|
22
|
+
- `install` (default `false`): if `true`, copy the skill directory to
|
|
23
|
+
`~/.claude/skills/<name>/` so the native Skill tool can invoke it later.
|
|
24
|
+
Local mode only. Use `force: true` to overwrite an existing local copy.
|
|
25
|
+
|
|
26
|
+
## When to load vs. install
|
|
27
|
+
|
|
28
|
+
- **Load** (default) when you need the skill *for this turn*. The SKILL.md
|
|
29
|
+
text comes back as a tool result; you read it and act. No filesystem side
|
|
30
|
+
effect.
|
|
31
|
+
- **Install** when the user wants the skill persisted for future Claude Code
|
|
32
|
+
sessions, or when the same skill will be invoked many times across a long
|
|
33
|
+
task.
|
|
34
|
+
|
|
35
|
+
## Heuristics
|
|
36
|
+
|
|
37
|
+
- Before starting a non-trivial task, call `gep_list_skill` once. If a name
|
|
38
|
+
or description matches the task, `gep_load_skill` it.
|
|
39
|
+
- Don't load every skill "just in case" — context isn't free.
|
|
40
|
+
- Hub skills are community-published; treat them as untrusted input until
|
|
41
|
+
reviewed.
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
[
|
|
2
|
+
{
|
|
3
|
+
"name": "_meta",
|
|
4
|
+
"dir": "_meta",
|
|
5
|
+
"version": "0.1.0",
|
|
6
|
+
"description": "Bootstrap skill that teaches the agent how to discover and load other skills on demand via gep_list_skill / gep_load_skill.",
|
|
7
|
+
"tags": [
|
|
8
|
+
"meta",
|
|
9
|
+
"bootstrap",
|
|
10
|
+
"evolver"
|
|
11
|
+
],
|
|
12
|
+
"sizeBytes": 1691
|
|
13
|
+
}
|
|
14
|
+
]
|