@evomap/evolver 1.29.8 → 1.30.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -6,6 +6,10 @@
6
6
  // Two-phase search-then-fetch to minimize credit cost:
7
7
  // Phase 1: POST /a2a/fetch with signals + search_only=true (free, metadata only)
8
8
  // Phase 2: POST /a2a/fetch with asset_ids=[selected] (pays for 1 asset only)
9
+ //
10
+ // Caching layers:
11
+ // 1. Search cache: signal fingerprint -> Phase 1 results (avoids repeat searches)
12
+ // 2. Payload cache: asset_id -> full payload (avoids repeat Phase 2 fetches)
9
13
 
10
14
  const { getNodeId, buildFetch, getHubNodeSecret } = require('./a2aProtocol');
11
15
  const { logAssetCall } = require('./assetCallLog');
@@ -13,7 +17,57 @@ const { logAssetCall } = require('./assetCallLog');
13
17
  const DEFAULT_MIN_REUSE_SCORE = 0.72;
14
18
  const DEFAULT_REUSE_MODE = 'reference'; // 'direct' | 'reference'
15
19
  const MAX_STREAK_CAP = 5;
16
- const TIMEOUT_REASON = 'hub_search_timeout';
20
+
21
+ const SEARCH_CACHE_TTL_MS = 5 * 60 * 1000;
22
+ const SEARCH_CACHE_MAX = 200;
23
+ const PAYLOAD_CACHE_MAX = 100;
24
+ const MIN_PHASE2_MS = 500;
25
+
26
+ // --- In-memory caches (per-process lifetime, bounded) ---
27
+
28
+ const _searchCache = new Map(); // cacheKey -> { ts, value: results[] }
29
+ const _payloadCache = new Map(); // asset_id -> full payload object
30
+
31
+ function _cacheKey(signals) {
32
+ return signals.slice().sort().join('|');
33
+ }
34
+
35
+ function _getSearchCache(key) {
36
+ const entry = _searchCache.get(key);
37
+ if (!entry) return null;
38
+ if (Date.now() - entry.ts > SEARCH_CACHE_TTL_MS) {
39
+ _searchCache.delete(key);
40
+ return null;
41
+ }
42
+ return entry.value;
43
+ }
44
+
45
+ function _setSearchCache(key, value) {
46
+ if (_searchCache.size >= SEARCH_CACHE_MAX) {
47
+ const oldest = _searchCache.keys().next().value;
48
+ _searchCache.delete(oldest);
49
+ }
50
+ _searchCache.set(key, { ts: Date.now(), value });
51
+ }
52
+
53
+ function _getPayloadCache(assetId) {
54
+ return _payloadCache.get(assetId) || null;
55
+ }
56
+
57
+ function _setPayloadCache(assetId, payload) {
58
+ if (_payloadCache.size >= PAYLOAD_CACHE_MAX) {
59
+ const oldest = _payloadCache.keys().next().value;
60
+ _payloadCache.delete(oldest);
61
+ }
62
+ _payloadCache.set(assetId, payload);
63
+ }
64
+
65
+ function clearCaches() {
66
+ _searchCache.clear();
67
+ _payloadCache.clear();
68
+ }
69
+
70
+ // --- Config helpers ---
17
71
 
18
72
  function getHubUrl() {
19
73
  return (process.env.A2A_HUB_URL || '').replace(/\/+$/, '');
@@ -29,6 +83,18 @@ function getMinReuseScore() {
29
83
  return Number.isFinite(n) && n > 0 ? n : DEFAULT_MIN_REUSE_SCORE;
30
84
  }
31
85
 
86
+ function _buildHeaders() {
87
+ const headers = { 'Content-Type': 'application/json', 'Accept': 'application/json' };
88
+ const secret = getHubNodeSecret();
89
+ if (secret) {
90
+ headers['Authorization'] = 'Bearer ' + secret;
91
+ } else {
92
+ const token = process.env.A2A_HUB_TOKEN;
93
+ if (token) headers['Authorization'] = `Bearer ${token}`;
94
+ }
95
+ return headers;
96
+ }
97
+
32
98
  /**
33
99
  * Score a hub asset for local reuse quality.
34
100
  * rank = confidence * min(max(success_streak, 1), MAX_STREAK_CAP) * (reputation / 100)
@@ -77,7 +143,14 @@ function pickBestMatch(results, threshold) {
77
143
  * Phase 1: search_only=true -> get candidate metadata (free, no credit cost)
78
144
  * Phase 2: asset_ids=[best_match] -> fetch full payload for the selected asset only
79
145
  *
80
- * Falls back to single-call fetch (old behavior) if search_only is not supported.
146
+ * Caching:
147
+ * - Phase 1 results are cached by signal fingerprint for 5 minutes.
148
+ * - Phase 2 payloads are cached by asset_id indefinitely (bounded LRU).
149
+ * - Both caches reduce Hub load and eliminate redundant network round-trips.
150
+ *
151
+ * Timeout: a single deadline spans both phases; Phase 2 is skipped if insufficient
152
+ * time remains (< 500ms).
153
+ *
81
154
  * Returns { hit: true, match, score, mode } or { hit: false }.
82
155
  */
83
156
  async function hubSearch(signals, opts) {
@@ -90,56 +163,53 @@ async function hubSearch(signals, opts) {
90
163
  if (signalList.length === 0) return { hit: false, reason: 'no_signals' };
91
164
 
92
165
  const threshold = (opts && Number.isFinite(opts.threshold)) ? opts.threshold : getMinReuseScore();
93
- const timeout = (opts && Number.isFinite(opts.timeoutMs)) ? opts.timeoutMs : 8000;
166
+ const timeoutMs = (opts && Number.isFinite(opts.timeoutMs)) ? opts.timeoutMs : 8000;
167
+ const deadline = Date.now() + timeoutMs;
168
+ const runId = (opts && opts.run_id) || null;
94
169
 
95
170
  try {
96
- // Phase 1: search_only to get candidate metadata (free)
97
- const searchMsg = buildFetch({ signals: signalList, searchOnly: true });
98
171
  const endpoint = hubUrl + '/a2a/fetch';
172
+ const headers = _buildHeaders();
173
+ const cacheKey = _cacheKey(signalList);
99
174
 
100
- const controller = new AbortController();
101
- const timer = setTimeout(() => controller.abort(TIMEOUT_REASON), timeout);
175
+ // --- Phase 1: search_only (free) ---
102
176
 
103
- const headers = { 'Content-Type': 'application/json', 'Accept': 'application/json' };
104
- const secret = getHubNodeSecret();
105
- if (secret) {
106
- headers['Authorization'] = 'Bearer ' + secret;
107
- } else {
108
- const token = process.env.A2A_HUB_TOKEN;
109
- if (token) headers['Authorization'] = `Bearer ${token}`;
110
- }
177
+ let results = _getSearchCache(cacheKey);
178
+ let cacheHit = !!results;
111
179
 
112
- const res = await fetch(endpoint, {
113
- method: 'POST',
114
- headers,
115
- body: JSON.stringify(searchMsg),
116
- signal: controller.signal,
117
- });
118
- clearTimeout(timer);
180
+ if (!results) {
181
+ const searchMsg = buildFetch({ signals: signalList, searchOnly: true });
182
+ const controller = new AbortController();
183
+ const timer = setTimeout(() => controller.abort(), deadline - Date.now());
119
184
 
120
- if (!res.ok) {
121
- logAssetCall({
122
- run_id: (opts && opts.run_id) || null,
123
- action: 'hub_search_miss',
124
- signals: signalList,
125
- reason: `hub_http_${res.status}`,
126
- via: 'search_then_fetch',
185
+ const res = await fetch(endpoint, {
186
+ method: 'POST',
187
+ headers,
188
+ body: JSON.stringify(searchMsg),
189
+ signal: controller.signal,
127
190
  });
128
- return { hit: false, reason: `hub_http_${res.status}` };
129
- }
191
+ clearTimeout(timer);
192
+
193
+ if (!res.ok) {
194
+ logAssetCall({
195
+ run_id: runId, action: 'hub_search_miss', signals: signalList,
196
+ reason: `hub_http_${res.status}`, via: 'search_then_fetch',
197
+ });
198
+ return { hit: false, reason: `hub_http_${res.status}` };
199
+ }
130
200
 
131
- const data = await res.json();
132
- const results = (data && data.payload && Array.isArray(data.payload.results))
133
- ? data.payload.results
134
- : [];
201
+ const data = await res.json();
202
+ results = (data && data.payload && Array.isArray(data.payload.results))
203
+ ? data.payload.results
204
+ : [];
205
+
206
+ _setSearchCache(cacheKey, results);
207
+ }
135
208
 
136
209
  if (results.length === 0) {
137
210
  logAssetCall({
138
- run_id: (opts && opts.run_id) || null,
139
- action: 'hub_search_miss',
140
- signals: signalList,
141
- reason: 'no_results',
142
- via: 'search_then_fetch',
211
+ run_id: runId, action: 'hub_search_miss', signals: signalList,
212
+ reason: 'no_results', via: 'search_then_fetch',
143
213
  });
144
214
  return { hit: false, reason: 'no_results' };
145
215
  }
@@ -147,9 +217,7 @@ async function hubSearch(signals, opts) {
147
217
  const pick = pickBestMatch(results, threshold);
148
218
  if (!pick) {
149
219
  logAssetCall({
150
- run_id: (opts && opts.run_id) || null,
151
- action: 'hub_search_miss',
152
- signals: signalList,
220
+ run_id: runId, action: 'hub_search_miss', signals: signalList,
153
221
  reason: 'below_threshold',
154
222
  extra: { candidates: results.length, threshold },
155
223
  via: 'search_then_fetch',
@@ -157,40 +225,52 @@ async function hubSearch(signals, opts) {
157
225
  return { hit: false, reason: 'below_threshold', candidates: results.length };
158
226
  }
159
227
 
160
- // Phase 2: fetch full payload for the selected asset only (pays for 1 asset)
228
+ // --- Phase 2: fetch full payload (paid, but free if already purchased) ---
229
+
161
230
  const selectedAssetId = pick.match.asset_id;
162
231
  if (selectedAssetId) {
163
- try {
164
- const fetchMsg = buildFetch({ assetIds: [selectedAssetId] });
165
- const controller2 = new AbortController();
166
- const timer2 = setTimeout(() => controller2.abort(TIMEOUT_REASON), timeout);
167
-
168
- const res2 = await fetch(endpoint, {
169
- method: 'POST',
170
- headers,
171
- body: JSON.stringify(fetchMsg),
172
- signal: controller2.signal,
173
- });
174
- clearTimeout(timer2);
175
-
176
- if (res2.ok) {
177
- const data2 = await res2.json();
178
- const fullResults = (data2 && data2.payload && Array.isArray(data2.payload.results))
179
- ? data2.payload.results
180
- : [];
181
- if (fullResults.length > 0) {
182
- pick.match = { ...pick.match, ...fullResults[0] };
232
+ const cachedPayload = _getPayloadCache(selectedAssetId);
233
+ if (cachedPayload) {
234
+ pick.match = { ...pick.match, ...cachedPayload };
235
+ } else {
236
+ const remaining = deadline - Date.now();
237
+ if (remaining > MIN_PHASE2_MS) {
238
+ try {
239
+ const fetchMsg = buildFetch({ assetIds: [selectedAssetId] });
240
+ const controller2 = new AbortController();
241
+ const timer2 = setTimeout(() => controller2.abort(), remaining);
242
+
243
+ const res2 = await fetch(endpoint, {
244
+ method: 'POST',
245
+ headers,
246
+ body: JSON.stringify(fetchMsg),
247
+ signal: controller2.signal,
248
+ });
249
+ clearTimeout(timer2);
250
+
251
+ if (res2.ok) {
252
+ const data2 = await res2.json();
253
+ const fullResults = (data2 && data2.payload && Array.isArray(data2.payload.results))
254
+ ? data2.payload.results
255
+ : [];
256
+ if (fullResults.length > 0) {
257
+ _setPayloadCache(selectedAssetId, fullResults[0]);
258
+ pick.match = { ...pick.match, ...fullResults[0] };
259
+ }
260
+ }
261
+ } catch (fetchErr) {
262
+ console.log(`[HubSearch] Phase 2 fetch failed (non-fatal): ${fetchErr.message}`);
183
263
  }
264
+ } else {
265
+ console.log(`[HubSearch] Phase 2 skipped: ${remaining}ms remaining < ${MIN_PHASE2_MS}ms threshold`);
184
266
  }
185
- } catch (fetchErr) {
186
- console.log(`[HubSearch] Phase 2 fetch failed (non-fatal): ${fetchErr.message}`);
187
267
  }
188
268
  }
189
269
 
190
- console.log(`[HubSearch] Hit via search+fetch: ${pick.match.asset_id || 'unknown'} (score=${pick.score}, mode=${pick.mode})`);
270
+ console.log(`[HubSearch] Hit via search+fetch: ${pick.match.asset_id || 'unknown'} (score=${pick.score}, mode=${pick.mode}${cacheHit ? ', search_cached' : ''})`);
191
271
 
192
272
  logAssetCall({
193
- run_id: (opts && opts.run_id) || null,
273
+ run_id: runId,
194
274
  action: 'hub_search_hit',
195
275
  asset_id: pick.match.asset_id || null,
196
276
  asset_type: pick.match.asset_type || pick.match.type || null,
@@ -199,7 +279,7 @@ async function hubSearch(signals, opts) {
199
279
  score: pick.score,
200
280
  mode: pick.mode,
201
281
  signals: signalList,
202
- via: 'search_then_fetch',
282
+ via: cacheHit ? 'search_cached' : 'search_then_fetch',
203
283
  });
204
284
 
205
285
  return {
@@ -212,11 +292,10 @@ async function hubSearch(signals, opts) {
212
292
  chain_id: pick.match.chain_id || null,
213
293
  };
214
294
  } catch (err) {
215
- const isTimeout = err.name === 'AbortError' || (err.cause && err.cause === TIMEOUT_REASON);
216
- const reason = isTimeout ? 'timeout' : 'fetch_error';
295
+ const reason = err.name === 'AbortError' ? 'timeout' : 'fetch_error';
217
296
  console.log(`[HubSearch] Failed (non-fatal, ${reason}): ${err.message}`);
218
297
  logAssetCall({
219
- run_id: (opts && opts.run_id) || null,
298
+ run_id: runId,
220
299
  action: 'hub_search_miss',
221
300
  signals: signalList,
222
301
  reason,
@@ -234,4 +313,5 @@ module.exports = {
234
313
  getReuseMode,
235
314
  getMinReuseScore,
236
315
  getHubUrl,
316
+ clearCaches,
237
317
  };
@@ -79,6 +79,10 @@ function selectGene(genes, signals, opts) {
79
79
  const driftEnabled = !!(opts && opts.driftEnabled);
80
80
  const preferredGeneId = opts && typeof opts.preferredGeneId === 'string' ? opts.preferredGeneId : null;
81
81
 
82
+ // Diversity-directed drift: capability_gaps from Hub heartbeat
83
+ var capabilityGaps = opts && Array.isArray(opts.capabilityGaps) ? opts.capabilityGaps : [];
84
+ var noveltyScore = opts && Number.isFinite(Number(opts.noveltyScore)) ? Number(opts.noveltyScore) : null;
85
+
82
86
  // Compute continuous drift intensity based on effective population size
83
87
  var driftIntensity = computeDriftIntensity({
84
88
  driftEnabled: driftEnabled,
@@ -99,7 +103,7 @@ function selectGene(genes, signals, opts) {
99
103
  .filter(x => x.score > 0)
100
104
  .sort((a, b) => b.score - a.score);
101
105
 
102
- if (scored.length === 0) return { selected: null, alternatives: [], driftIntensity: driftIntensity };
106
+ if (scored.length === 0) return { selected: null, alternatives: [], driftIntensity: driftIntensity, driftMode: 'none' };
103
107
 
104
108
  // Memory graph preference: only override when the preferred gene is already a match candidate.
105
109
  if (preferredGeneId) {
@@ -111,27 +115,68 @@ function selectGene(genes, signals, opts) {
111
115
  selected: preferred.gene,
112
116
  alternatives: filteredRest.slice(0, 4).map(x => x.gene),
113
117
  driftIntensity: driftIntensity,
118
+ driftMode: 'memory_preferred',
114
119
  };
115
120
  }
116
121
  }
117
122
 
118
123
  // Low-efficiency suppression: do not repeat low-confidence paths unless drift is active.
119
124
  const filtered = useDrift ? scored : scored.filter(x => x.gene && !bannedGeneIds.has(x.gene.id));
120
- if (filtered.length === 0) return { selected: null, alternatives: scored.slice(0, 4).map(x => x.gene), driftIntensity: driftIntensity };
125
+ if (filtered.length === 0) return { selected: null, alternatives: scored.slice(0, 4).map(x => x.gene), driftIntensity: driftIntensity, driftMode: 'none' };
121
126
 
122
- // Stochastic selection under drift: with probability proportional to driftIntensity,
123
- // pick a random gene from the top candidates instead of always picking the best.
127
+ // Diversity-directed drift: when capability gaps are available, prefer genes that
128
+ // cover gap areas instead of pure random selection. This replaces the blind
129
+ // random drift with an informed exploration toward under-covered capabilities.
124
130
  var selectedIdx = 0;
131
+ var driftMode = 'selection';
125
132
  if (driftIntensity > 0 && filtered.length > 1 && Math.random() < driftIntensity) {
126
- // Weighted random selection from top candidates (favor higher-scoring but allow lower)
127
- var topN = Math.min(filtered.length, Math.max(2, Math.ceil(filtered.length * driftIntensity)));
128
- selectedIdx = Math.floor(Math.random() * topN);
133
+ if (capabilityGaps.length > 0) {
134
+ // Directed drift: score each candidate by how well its signals_match
135
+ // covers the capability gap dimensions
136
+ var gapScores = filtered.map(function(entry, idx) {
137
+ var g = entry.gene;
138
+ var patterns = Array.isArray(g.signals_match) ? g.signals_match : [];
139
+ var gapHits = 0;
140
+ for (var gi = 0; gi < capabilityGaps.length && gi < 5; gi++) {
141
+ var gapSignal = capabilityGaps[gi];
142
+ if (typeof gapSignal === 'string' && patterns.some(function(p) { return matchPatternToSignals(p, [gapSignal]); })) {
143
+ gapHits++;
144
+ }
145
+ }
146
+ return { idx: idx, gapHits: gapHits, baseScore: entry.score };
147
+ });
148
+
149
+ var hasGapHits = gapScores.some(function(gs) { return gs.gapHits > 0; });
150
+ if (hasGapHits) {
151
+ // Sort by gap coverage first, then by base score
152
+ gapScores.sort(function(a, b) {
153
+ return b.gapHits - a.gapHits || b.baseScore - a.baseScore;
154
+ });
155
+ selectedIdx = gapScores[0].idx;
156
+ driftMode = 'diversity_directed';
157
+ } else {
158
+ // No gap match: fall back to novelty-weighted random selection
159
+ var topN = Math.min(filtered.length, Math.max(2, Math.ceil(filtered.length * driftIntensity)));
160
+ // If novelty score is low (agent is too similar to others), increase exploration range
161
+ if (noveltyScore != null && noveltyScore < 0.3 && topN < filtered.length) {
162
+ topN = Math.min(filtered.length, topN + 1);
163
+ }
164
+ selectedIdx = Math.floor(Math.random() * topN);
165
+ driftMode = 'random_weighted';
166
+ }
167
+ } else {
168
+ // No capability gap data: original random drift behavior
169
+ var topN = Math.min(filtered.length, Math.max(2, Math.ceil(filtered.length * driftIntensity)));
170
+ selectedIdx = Math.floor(Math.random() * topN);
171
+ driftMode = 'random';
172
+ }
129
173
  }
130
174
 
131
175
  return {
132
176
  selected: filtered[selectedIdx].gene,
133
177
  alternatives: filtered.filter(function(_, i) { return i !== selectedIdx; }).slice(0, 4).map(x => x.gene),
134
178
  driftIntensity: driftIntensity,
179
+ driftMode: driftMode,
135
180
  };
136
181
  }
137
182
 
@@ -182,7 +227,7 @@ function banGenesFromFailedCapsules(failedCapsules, signals, existingBans) {
182
227
  return bans;
183
228
  }
184
229
 
185
- function selectGeneAndCapsule({ genes, capsules, signals, memoryAdvice, driftEnabled, failedCapsules }) {
230
+ function selectGeneAndCapsule({ genes, capsules, signals, memoryAdvice, driftEnabled, failedCapsules, capabilityGaps, noveltyScore }) {
186
231
  const bannedGeneIds =
187
232
  memoryAdvice && memoryAdvice.bannedGeneIds instanceof Set ? memoryAdvice.bannedGeneIds : new Set();
188
233
  const preferredGeneId = memoryAdvice && memoryAdvice.preferredGeneId ? memoryAdvice.preferredGeneId : null;
@@ -197,6 +242,8 @@ function selectGeneAndCapsule({ genes, capsules, signals, memoryAdvice, driftEna
197
242
  bannedGeneIds: effectiveBans,
198
243
  preferredGeneId,
199
244
  driftEnabled: !!driftEnabled,
245
+ capabilityGaps: Array.isArray(capabilityGaps) ? capabilityGaps : [],
246
+ noveltyScore: Number.isFinite(Number(noveltyScore)) ? Number(noveltyScore) : null,
200
247
  });
201
248
  const capsule = selectCapsule(capsules, signals);
202
249
  const selector = buildSelectorDecision({
@@ -223,32 +223,78 @@ function buildDistillationPrompt(analysis, existingGenes, sampleCapsules) {
223
223
  });
224
224
 
225
225
  return [
226
- 'You are a Gene synthesis engine for the GEP (Gene Expression Protocol).',
226
+ 'You are a Gene synthesis engine for the GEP (Genome Evolution Protocol).',
227
+ 'Your job is to distill successful evolution capsules into a high-quality, reusable Gene',
228
+ 'that other AI agents can discover, fetch, and execute.',
227
229
  '',
228
- 'Analyze the following successful evolution capsules and extract a reusable Gene.',
230
+ '## OUTPUT FORMAT',
231
+ '',
232
+ 'Output ONLY a single valid JSON object (no markdown fences, no explanation).',
233
+ '',
234
+ '## GENE ID RULES (CRITICAL)',
235
+ '',
236
+ '- The id MUST start with "' + DISTILLED_ID_PREFIX + '" followed by a descriptive kebab-case name.',
237
+ '- The suffix MUST describe the core capability in 3-6 hyphen-separated words.',
238
+ '- NEVER include timestamps, numeric IDs, random numbers, tool names (cursor, vscode, etc.), or UUIDs.',
239
+ '- Good: "gene_distilled_retry-with-exponential-backoff", "gene_distilled_database-migration-rollback"',
240
+ '- Bad: "gene_distilled_cursor-1773331925711", "gene_distilled_1234567890", "gene_distilled_fix-1"',
241
+ '',
242
+ '## SUMMARY RULES',
243
+ '',
244
+ '- The "summary" MUST be a clear, human-readable sentence (30-200 chars) describing',
245
+ ' WHAT capability this Gene provides and WHY it is useful.',
246
+ '- Write as if for a marketplace listing -- the summary is the first thing other agents see.',
247
+ '- Good: "Retry failed HTTP requests with exponential backoff, jitter, and circuit breaker to prevent cascade failures"',
248
+ '- Bad: "Distilled from capsules", "AI agent skill", "cursor automation", "1773331925711"',
249
+ '- NEVER include timestamps, build numbers, or tool names in the summary.',
250
+ '',
251
+ '## SIGNALS_MATCH RULES',
252
+ '',
253
+ '- Each signal MUST be a generic, reusable keyword that describes WHEN to trigger this Gene.',
254
+ '- Use lowercase_snake_case. Signals should be domain terms, not implementation artifacts.',
255
+ '- NEVER include timestamps, build numbers, tool names, session IDs, or random suffixes.',
256
+ '- Include 3-7 signals covering both the problem domain and the solution approach.',
257
+ '- Good: ["http_retry", "request_timeout", "exponential_backoff", "circuit_breaker", "resilience"]',
258
+ '- Bad: ["cursor_auto_1773331925711", "cli_headless_1773331925711", "bypass_123"]',
259
+ '',
260
+ '## STRATEGY RULES',
261
+ '',
262
+ '- Strategy steps MUST be actionable, concrete instructions an AI agent can execute.',
263
+ '- Each step should be a clear imperative sentence starting with a verb.',
264
+ '- Include 5-10 steps. Each step should be self-contained and specific.',
265
+ '- Do NOT describe what happened; describe what TO DO.',
266
+ '- Include rationale or context in parentheses when non-obvious.',
267
+ '- Where applicable, include inline code examples using backtick notation.',
268
+ '- Good: "Wrap the HTTP call in a retry loop with `maxRetries=3` and initial delay of 500ms"',
269
+ '- Bad: "Handle retries", "Fix the issue", "Improve reliability"',
270
+ '',
271
+ '## PRECONDITIONS RULES',
272
+ '',
273
+ '- List concrete, verifiable conditions that must be true before applying this Gene.',
274
+ '- Each precondition should be a testable statement, not a vague requirement.',
275
+ '- Good: "Project uses Node.js >= 18 with ES module support"',
276
+ '- Bad: "need to fix something"',
277
+ '',
278
+ '## CONSTRAINTS',
229
279
  '',
230
- 'RULES:',
231
- '- Strategy steps MUST be actionable operations, NOT summaries',
232
- '- Each step must be a concrete instruction an AI agent can execute',
233
- '- Do NOT describe what happened; describe what TO DO next time',
234
- '- The Gene MUST have a unique id starting with "' + DISTILLED_ID_PREFIX + '"',
235
280
  '- constraints.max_files MUST be <= ' + DISTILLED_MAX_FILES,
236
281
  '- constraints.forbidden_paths MUST include at least [".git", "node_modules"]',
237
- '- Output valid Gene JSON only (no markdown, no explanation)',
238
282
  '',
239
- 'GENE ID NAMING RULES (CRITICAL):',
240
- '- The id suffix (after "' + DISTILLED_ID_PREFIX + '") MUST be a descriptive kebab-case name',
241
- ' derived from the strategy content or signals_match (e.g., "retry-on-timeout", "log-rotation-cleanup")',
242
- '- NEVER use timestamps, random numbers, tool names (cursor, vscode, etc.), or UUIDs in the id',
243
- '- Good: "gene_distilled_retry-on-timeout", "gene_distilled_cache-invalidation-strategy"',
244
- '- Bad: "gene_distilled_cursor-1773331925711", "gene_distilled_1234567890", "gene_distilled_fix-1"',
245
- '- The id suffix must be 3+ words separated by hyphens, describing the core capability',
283
+ '## VALIDATION',
284
+ '',
285
+ '- Validation commands MUST start with "node ", "npm ", or "npx " (security constraint).',
286
+ '- Include commands that actually verify the Gene was applied correctly.',
287
+ '- Good: "npx tsc --noEmit", "npm test"',
288
+ '- Bad: "node -v" (proves nothing about the Gene)',
289
+ '',
290
+ '## QUALITY BAR',
246
291
  '',
247
- 'SUMMARY RULES:',
248
- '- The "summary" field MUST be a clear, human-readable description (10-200 chars)',
249
- '- It should describe WHAT the Gene does, not implementation details',
250
- '- Good: "Retry failed HTTP requests with exponential backoff and circuit breaker"',
251
- '- Bad: "Distilled from capsules", "AI agent skill", "cursor automation"',
292
+ 'Imagine this Gene will be published on a marketplace for thousands of AI agents.',
293
+ 'It should be as professional and useful as a well-written library README.',
294
+ 'Ask yourself: "Would another agent find this Gene by searching for the signals?',
295
+ 'Would the summary make them want to fetch it? Would the strategy be enough to execute?"',
296
+ '',
297
+ '---',
252
298
  '',
253
299
  'SUCCESSFUL CAPSULES (grouped by pattern):',
254
300
  JSON.stringify(samples, null, 2),
@@ -260,7 +306,7 @@ function buildDistillationPrompt(analysis, existingGenes, sampleCapsules) {
260
306
  JSON.stringify(analysis, null, 2),
261
307
  '',
262
308
  'Output a single Gene JSON object with these fields:',
263
- '{ "type": "Gene", "id": "gene_distilled_<descriptive-kebab-name>", "summary": "<clear human-readable description>", "category": "...", "signals_match": [...], "preconditions": [...], "strategy": [...], "constraints": { "max_files": N, "forbidden_paths": [...] }, "validation": [...] }',
309
+ '{ "type": "Gene", "id": "gene_distilled_<descriptive-kebab-name>", "summary": "<clear marketplace-quality description>", "category": "repair|optimize|innovate", "signals_match": ["generic_signal_1", ...], "preconditions": ["Concrete condition 1", ...], "strategy": ["Step 1: verb ...", "Step 2: verb ...", ...], "constraints": { "max_files": N, "forbidden_paths": [".git", "node_modules", ...] }, "validation": ["npx tsc --noEmit", ...], "schema_version": "1.6.0" }',
264
310
  ].join('\n');
265
311
  }
266
312
 
@@ -298,6 +344,34 @@ function deriveDescriptiveId(gene) {
298
344
  return DISTILLED_ID_PREFIX + unique.slice(0, 5).join('-');
299
345
  }
300
346
 
347
+ // ---------------------------------------------------------------------------
348
+ // Step 4: sanitizeSignalsMatch -- strip timestamps, random suffixes, tool names
349
+ // ---------------------------------------------------------------------------
350
+ function sanitizeSignalsMatch(signals) {
351
+ if (!Array.isArray(signals)) return [];
352
+ var cleaned = [];
353
+ signals.forEach(function (s) {
354
+ var sig = String(s || '').trim().toLowerCase();
355
+ if (!sig) return;
356
+ // Strip trailing timestamps (10+ digits) and random suffixes
357
+ sig = sig.replace(/[_-]\d{10,}$/g, '');
358
+ // Strip leading/trailing underscores/hyphens left over
359
+ sig = sig.replace(/^[_-]+|[_-]+$/g, '');
360
+ // Reject signals that are purely numeric
361
+ if (/^\d+$/.test(sig)) return;
362
+ // Reject signals that are just a tool name with optional number
363
+ if (/^(cursor|vscode|vim|emacs|windsurf|copilot|cline|codex|bypass|distill)[_-]?\d*$/i.test(sig)) return;
364
+ // Reject signals shorter than 3 chars after cleaning
365
+ if (sig.length < 3) return;
366
+ // Reject signals that still contain long numeric sequences (session IDs, etc.)
367
+ if (/\d{8,}/.test(sig)) return;
368
+ cleaned.push(sig);
369
+ });
370
+ // Deduplicate
371
+ var seen = {};
372
+ return cleaned.filter(function (s) { if (seen[s]) return false; seen[s] = true; return true; });
373
+ }
374
+
301
375
  // ---------------------------------------------------------------------------
302
376
  // Step 4: validateSynthesizedGene
303
377
  // ---------------------------------------------------------------------------
@@ -311,16 +385,34 @@ function validateSynthesizedGene(gene, existingGenes) {
311
385
  if (!Array.isArray(gene.signals_match) || gene.signals_match.length === 0) errors.push('missing or empty signals_match');
312
386
  if (!Array.isArray(gene.strategy) || gene.strategy.length === 0) errors.push('missing or empty strategy');
313
387
 
388
+ // --- Signals sanitization (BEFORE id derivation so deriveDescriptiveId uses clean signals) ---
389
+ if (Array.isArray(gene.signals_match)) {
390
+ gene.signals_match = sanitizeSignalsMatch(gene.signals_match);
391
+ if (gene.signals_match.length === 0) {
392
+ errors.push('signals_match is empty after sanitization (all signals were invalid)');
393
+ }
394
+ }
395
+
396
+ // --- Summary sanitization (BEFORE id derivation so deriveDescriptiveId uses clean summary) ---
397
+ if (gene.summary) {
398
+ gene.summary = gene.summary.replace(/\s*\d{10,}\s*$/g, '').replace(/\.\s*\d{10,}/g, '.').trim();
399
+ }
400
+
401
+ // --- ID sanitization ---
314
402
  if (gene.id && !String(gene.id).startsWith(DISTILLED_ID_PREFIX)) {
315
403
  gene.id = DISTILLED_ID_PREFIX + String(gene.id).replace(/^gene_/, '');
316
404
  }
317
405
 
318
406
  if (gene.id) {
319
407
  var suffix = String(gene.id).replace(DISTILLED_ID_PREFIX, '');
408
+ // Strip ALL embedded timestamps (10+ digit sequences) anywhere in the id
409
+ suffix = suffix.replace(/[-_]?\d{10,}[-_]?/g, '-').replace(/[-_]+/g, '-').replace(/^[-_]+|[-_]+$/g, '');
320
410
  var needsRename = /^\d+$/.test(suffix) || /^\d{10,}/.test(suffix)
321
- || /^(cursor|vscode|vim|emacs|windsurf|copilot|cline|codex)[-_]?\d*/i.test(suffix);
411
+ || /^(cursor|vscode|vim|emacs|windsurf|copilot|cline|codex)[-_]?\d*$/i.test(suffix);
322
412
  if (needsRename) {
323
413
  gene.id = deriveDescriptiveId(gene);
414
+ } else {
415
+ gene.id = DISTILLED_ID_PREFIX + suffix;
324
416
  }
325
417
  var cleanSuffix = String(gene.id).replace(DISTILLED_ID_PREFIX, '');
326
418
  if (cleanSuffix.replace(/[-_]/g, '').length < 6) {
@@ -328,6 +420,7 @@ function validateSynthesizedGene(gene, existingGenes) {
328
420
  }
329
421
  }
330
422
 
423
+ // --- Summary fallback (summary was already sanitized above, this handles missing/short) ---
331
424
  if (!gene.summary || typeof gene.summary !== 'string' || gene.summary.length < 10) {
332
425
  if (Array.isArray(gene.strategy) && gene.strategy.length > 0) {
333
426
  gene.summary = String(gene.strategy[0]).slice(0, 200);
@@ -336,6 +429,12 @@ function validateSynthesizedGene(gene, existingGenes) {
336
429
  }
337
430
  }
338
431
 
432
+ // --- Strategy quality: require minimum 3 steps ---
433
+ if (Array.isArray(gene.strategy) && gene.strategy.length < 3) {
434
+ errors.push('strategy must have at least 3 steps for a quality skill');
435
+ }
436
+
437
+ // --- Constraints ---
339
438
  if (!gene.constraints || typeof gene.constraints !== 'object') gene.constraints = {};
340
439
  if (!Array.isArray(gene.constraints.forbidden_paths) || gene.constraints.forbidden_paths.length === 0) {
341
440
  gene.constraints.forbidden_paths = ['.git', 'node_modules'];
@@ -347,6 +446,7 @@ function validateSynthesizedGene(gene, existingGenes) {
347
446
  gene.constraints.max_files = DISTILLED_MAX_FILES;
348
447
  }
349
448
 
449
+ // --- Validation command sanitization ---
350
450
  var ALLOWED_PREFIXES = ['node ', 'npm ', 'npx '];
351
451
  if (Array.isArray(gene.validation)) {
352
452
  gene.validation = gene.validation.filter(function (cmd) {
@@ -359,11 +459,16 @@ function validateSynthesizedGene(gene, existingGenes) {
359
459
  });
360
460
  }
361
461
 
462
+ // --- Schema version ---
463
+ if (!gene.schema_version) gene.schema_version = '1.6.0';
464
+
465
+ // --- Duplicate ID check ---
362
466
  var existingIds = new Set((existingGenes || []).map(function (g) { return g.id; }));
363
467
  if (gene.id && existingIds.has(gene.id)) {
364
468
  gene.id = gene.id + '_' + Date.now().toString(36);
365
469
  }
366
470
 
471
+ // --- Signal overlap check ---
367
472
  if (gene.signals_match && existingGenes && existingGenes.length > 0) {
368
473
  var newSet = new Set(gene.signals_match.map(function (s) { return String(s).toLowerCase(); }));
369
474
  for (var i = 0; i < existingGenes.length; i++) {
@@ -566,6 +671,7 @@ module.exports = {
566
671
  prepareDistillation: prepareDistillation,
567
672
  completeDistillation: completeDistillation,
568
673
  validateSynthesizedGene: validateSynthesizedGene,
674
+ sanitizeSignalsMatch: sanitizeSignalsMatch,
569
675
  shouldDistill: shouldDistill,
570
676
  buildDistillationPrompt: buildDistillationPrompt,
571
677
  extractJsonFromLlmResponse: extractJsonFromLlmResponse,