@claude-flow/cli 3.6.19 → 3.6.20

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -108,6 +108,47 @@ function deduplicateById(entries) {
108
108
  return Array.from(seen.values());
109
109
  }
110
110
 
111
+ // ADR-095 G6 — content-hash dedup. The April audit measured 5,706 entries
112
+ // in the auto-memory store with only ~20 unique by content; 5,686 dupes
113
+ // were the same MEMORY.md sections imported from sibling project dirs
114
+ // with different IDs. deduplicateById can't catch these (the IDs really
115
+ // are different); we need a content fingerprint.
116
+ //
117
+ // Fast non-cryptographic fingerprint — collisions on 64-bit FNV-1a are
118
+ // vanishingly rare for human prose at the scale of an auto-memory store.
119
+ // Whitespace-normalized so trivially-different formatting doesn't bypass dedup.
120
+ function fingerprintContent(text) {
121
+ if (typeof text !== 'string' || text.length === 0) return '0';
122
+ const norm = text.replace(/\s+/g, ' ').trim().toLowerCase();
123
+ // FNV-1a 64-bit (split into 32-bit halves to stay within Number safe int)
124
+ let h1 = 0x811c9dc5, h2 = 0xcbf29ce4;
125
+ for (let i = 0; i < norm.length; i++) {
126
+ const c = norm.charCodeAt(i);
127
+ h1 ^= c; h1 = Math.imul(h1, 0x01000193) >>> 0;
128
+ h2 ^= c; h2 = Math.imul(h2, 0x100000001b3 & 0xffffffff) >>> 0;
129
+ }
130
+ return `${h1.toString(16)}_${h2.toString(16)}_${norm.length}`;
131
+ }
132
+
133
+ function deduplicateByContent(entries) {
134
+ if (!entries || !Array.isArray(entries)) return entries;
135
+ const seen = new Map();
136
+ for (const entry of entries) {
137
+ const content = entry.content || entry.summary || entry.value || '';
138
+ const fp = fingerprintContent(typeof content === 'string' ? content : JSON.stringify(content));
139
+ if (!seen.has(fp)) {
140
+ seen.set(fp, entry);
141
+ } else {
142
+ // Keep the entry with the higher accessCount or earlier createdAt
143
+ const existing = seen.get(fp);
144
+ const existingAccess = existing.accessCount || 0;
145
+ const candidateAccess = entry.accessCount || 0;
146
+ if (candidateAccess > existingAccess) seen.set(fp, entry);
147
+ }
148
+ }
149
+ return Array.from(seen.values());
150
+ }
151
+
111
152
  // ── Session state helpers ────────────────────────────────────────────────────
112
153
 
113
154
  function sessionGet(key) {
@@ -217,14 +258,24 @@ function buildEdges(entries) {
217
258
  }
218
259
  }
219
260
 
220
- // Similarity edges within categories (Jaccard > 0.3)
261
+ // Similarity edges within categories (Jaccard > 0.3).
262
+ // ADR-095 G6 perf: hoist the trigram computation outside the inner
263
+ // loop. Previously we re-tokenized + re-trigrammed group[j] for every
264
+ // i — O(n²) extra work for nothing. Now compute once per entry.
221
265
  for (const cat of Object.keys(byCategory)) {
222
266
  const group = byCategory[cat];
267
+ if (group.length < 2) continue;
268
+
269
+ // Cache trigram sets for every entry in the group.
270
+ const triCache = new Array(group.length);
271
+ for (let i = 0; i < group.length; i++) {
272
+ triCache[i] = trigrams(tokenize(group[i].content || group[i].summary || ''));
273
+ }
274
+
223
275
  for (let i = 0; i < group.length; i++) {
224
- const triA = trigrams(tokenize(group[i].content || group[i].summary || ''));
276
+ const triA = triCache[i];
225
277
  for (let j = i + 1; j < group.length; j++) {
226
- const triB = trigrams(tokenize(group[j].content || group[j].summary || ''));
227
- const sim = jaccardSimilarity(triA, triB);
278
+ const sim = jaccardSimilarity(triA, triCache[j]);
228
279
  if (sim > 0.3) {
229
280
  edges.push({
230
281
  sourceId: group[i].id,
@@ -340,9 +391,21 @@ function init() {
340
391
  }
341
392
 
342
393
  // Deduplicate store entries by ID (fixes #1518 — 194MB → ~79KB)
343
- const deduped = deduplicateById(store);
394
+ let deduped = deduplicateById(store);
395
+ // ADR-095 G6: also dedupe by content fingerprint. The April audit
396
+ // measured 5,706 entries with only ~20 unique by content because the
397
+ // same MEMORY.md sections get imported from sibling project dirs with
398
+ // different IDs. deduplicateById can't catch that; deduplicateByContent
399
+ // can. Cuts the graph from O(n²) over near-identical duplicates down
400
+ // to O(unique²), which is the difference between a 100MB graph-state
401
+ // and a kilobytes-scale one for typical workloads.
402
+ const beforeContentDedup = deduped.length;
403
+ deduped = deduplicateByContent(deduped);
344
404
  if (deduped.length < store.length) {
345
- process.stderr.write(`[INTELLIGENCE] Deduped store: ${store.length} -> ${deduped.length} entries\n`);
405
+ process.stderr.write(
406
+ `[INTELLIGENCE] Deduped store: ${store.length} -> ${deduped.length} entries ` +
407
+ `(by-id: ${store.length - beforeContentDedup} dropped, by-content: ${beforeContentDedup - deduped.length} dropped)\n`
408
+ );
346
409
  writeJSON(STORE_PATH, deduped);
347
410
  }
348
411
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@claude-flow/cli",
3
- "version": "3.6.19",
3
+ "version": "3.6.20",
4
4
  "type": "module",
5
5
  "description": "Ruflo CLI - Enterprise AI agent orchestration with 60+ specialized agents, swarm coordination, MCP server, self-learning hooks, and vector memory for Claude Code",
6
6
  "main": "dist/src/index.js",