@claude-flow/cli 3.6.19 → 3.6.20
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
|
@@ -108,6 +108,47 @@ function deduplicateById(entries) {
|
|
|
108
108
|
return Array.from(seen.values());
|
|
109
109
|
}
|
|
110
110
|
|
|
111
|
+
// ADR-095 G6 — content-hash dedup. The April audit measured 5,706 entries
|
|
112
|
+
// in the auto-memory store with only ~20 unique by content; 5,686 dupes
|
|
113
|
+
// were the same MEMORY.md sections imported from sibling project dirs
|
|
114
|
+
// with different IDs. deduplicateById can't catch these (the IDs really
|
|
115
|
+
// are different); we need a content fingerprint.
|
|
116
|
+
//
|
|
117
|
+
// Fast non-cryptographic fingerprint — collisions on 64-bit FNV-1a are
|
|
118
|
+
// vanishingly rare for human prose at the scale of an auto-memory store.
|
|
119
|
+
// Whitespace-normalized so trivially-different formatting doesn't bypass dedup.
|
|
120
|
+
function fingerprintContent(text) {
|
|
121
|
+
if (typeof text !== 'string' || text.length === 0) return '0';
|
|
122
|
+
const norm = text.replace(/\s+/g, ' ').trim().toLowerCase();
|
|
123
|
+
// FNV-1a 64-bit (split into 32-bit halves to stay within Number safe int)
|
|
124
|
+
let h1 = 0x811c9dc5, h2 = 0xcbf29ce4;
|
|
125
|
+
for (let i = 0; i < norm.length; i++) {
|
|
126
|
+
const c = norm.charCodeAt(i);
|
|
127
|
+
h1 ^= c; h1 = Math.imul(h1, 0x01000193) >>> 0;
|
|
128
|
+
h2 ^= c; h2 = Math.imul(h2, 0x100000001b3 & 0xffffffff) >>> 0;
|
|
129
|
+
}
|
|
130
|
+
return `${h1.toString(16)}_${h2.toString(16)}_${norm.length}`;
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
function deduplicateByContent(entries) {
|
|
134
|
+
if (!entries || !Array.isArray(entries)) return entries;
|
|
135
|
+
const seen = new Map();
|
|
136
|
+
for (const entry of entries) {
|
|
137
|
+
const content = entry.content || entry.summary || entry.value || '';
|
|
138
|
+
const fp = fingerprintContent(typeof content === 'string' ? content : JSON.stringify(content));
|
|
139
|
+
if (!seen.has(fp)) {
|
|
140
|
+
seen.set(fp, entry);
|
|
141
|
+
} else {
|
|
142
|
+
// Keep the entry with the higher accessCount or earlier createdAt
|
|
143
|
+
const existing = seen.get(fp);
|
|
144
|
+
const existingAccess = existing.accessCount || 0;
|
|
145
|
+
const candidateAccess = entry.accessCount || 0;
|
|
146
|
+
if (candidateAccess > existingAccess) seen.set(fp, entry);
|
|
147
|
+
}
|
|
148
|
+
}
|
|
149
|
+
return Array.from(seen.values());
|
|
150
|
+
}
|
|
151
|
+
|
|
111
152
|
// ── Session state helpers ────────────────────────────────────────────────────
|
|
112
153
|
|
|
113
154
|
function sessionGet(key) {
|
|
@@ -217,14 +258,24 @@ function buildEdges(entries) {
|
|
|
217
258
|
}
|
|
218
259
|
}
|
|
219
260
|
|
|
220
|
-
// Similarity edges within categories (Jaccard > 0.3)
|
|
261
|
+
// Similarity edges within categories (Jaccard > 0.3).
|
|
262
|
+
// ADR-095 G6 perf: hoist the trigram computation outside the inner
|
|
263
|
+
// loop. Previously we re-tokenized + re-trigrammed group[j] for every
|
|
264
|
+
// i — O(n²) extra work for nothing. Now compute once per entry.
|
|
221
265
|
for (const cat of Object.keys(byCategory)) {
|
|
222
266
|
const group = byCategory[cat];
|
|
267
|
+
if (group.length < 2) continue;
|
|
268
|
+
|
|
269
|
+
// Cache trigram sets for every entry in the group.
|
|
270
|
+
const triCache = new Array(group.length);
|
|
271
|
+
for (let i = 0; i < group.length; i++) {
|
|
272
|
+
triCache[i] = trigrams(tokenize(group[i].content || group[i].summary || ''));
|
|
273
|
+
}
|
|
274
|
+
|
|
223
275
|
for (let i = 0; i < group.length; i++) {
|
|
224
|
-
const triA =
|
|
276
|
+
const triA = triCache[i];
|
|
225
277
|
for (let j = i + 1; j < group.length; j++) {
|
|
226
|
-
const
|
|
227
|
-
const sim = jaccardSimilarity(triA, triB);
|
|
278
|
+
const sim = jaccardSimilarity(triA, triCache[j]);
|
|
228
279
|
if (sim > 0.3) {
|
|
229
280
|
edges.push({
|
|
230
281
|
sourceId: group[i].id,
|
|
@@ -340,9 +391,21 @@ function init() {
|
|
|
340
391
|
}
|
|
341
392
|
|
|
342
393
|
// Deduplicate store entries by ID (fixes #1518 — 194MB → ~79KB)
|
|
343
|
-
|
|
394
|
+
let deduped = deduplicateById(store);
|
|
395
|
+
// ADR-095 G6: also dedupe by content fingerprint. The April audit
|
|
396
|
+
// measured 5,706 entries with only ~20 unique by content because the
|
|
397
|
+
// same MEMORY.md sections get imported from sibling project dirs with
|
|
398
|
+
// different IDs. deduplicateById can't catch that; deduplicateByContent
|
|
399
|
+
// can. Cuts the graph from O(n²) over near-identical duplicates down
|
|
400
|
+
// to O(unique²), which is the difference between a 100MB graph-state
|
|
401
|
+
// and a kilobytes-scale one for typical workloads.
|
|
402
|
+
const beforeContentDedup = deduped.length;
|
|
403
|
+
deduped = deduplicateByContent(deduped);
|
|
344
404
|
if (deduped.length < store.length) {
|
|
345
|
-
process.stderr.write(
|
|
405
|
+
process.stderr.write(
|
|
406
|
+
`[INTELLIGENCE] Deduped store: ${store.length} -> ${deduped.length} entries ` +
|
|
407
|
+
`(by-id: ${store.length - beforeContentDedup} dropped, by-content: ${beforeContentDedup - deduped.length} dropped)\n`
|
|
408
|
+
);
|
|
346
409
|
writeJSON(STORE_PATH, deduped);
|
|
347
410
|
}
|
|
348
411
|
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@claude-flow/cli",
|
|
3
|
-
"version": "3.6.
|
|
3
|
+
"version": "3.6.20",
|
|
4
4
|
"type": "module",
|
|
5
5
|
"description": "Ruflo CLI - Enterprise AI agent orchestration with 60+ specialized agents, swarm coordination, MCP server, self-learning hooks, and vector memory for Claude Code",
|
|
6
6
|
"main": "dist/src/index.js",
|