@icex-labs/openclaw-memory-engine 5.2.1 → 5.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/index.js +2 -10
- package/lib/dedup.js +39 -15
- package/lib/graph.js +7 -1
- package/lib/paths.js +1 -1
- package/package.json +1 -1
package/index.js
CHANGED
|
@@ -147,16 +147,8 @@ export default definePluginEntry({
|
|
|
147
147
|
} catch { /* don't break message flow */ }
|
|
148
148
|
}, { name: "memory-engine-capture-received", description: "Auto-capture facts from incoming messages" });
|
|
149
149
|
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
const ctx = event.context;
|
|
153
|
-
if (!ctx?.content || !ctx?.success) return;
|
|
154
|
-
if (ctx.content.length < 50) return;
|
|
155
|
-
const agentId = extractAgentId(event.sessionKey);
|
|
156
|
-
const wsDir = resolveWorkspace({ agentId });
|
|
157
|
-
captureMessage(wsDir, ctx.content, "agent-reply");
|
|
158
|
-
} catch { /* don't break message flow */ }
|
|
159
|
-
}, { name: "memory-engine-capture-sent", description: "Auto-capture facts from agent replies" });
|
|
150
|
+
// message:sent hook removed — agent replies are restatements, not new facts.
|
|
151
|
+
// Only user messages (message:received) are auto-captured.
|
|
160
152
|
|
|
161
153
|
// ─── core_memory_read ───
|
|
162
154
|
api.registerTool(withAgent((agentId) => ({
|
package/lib/dedup.js
CHANGED
|
@@ -1,22 +1,45 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* Deduplication via embedding cosine similarity.
|
|
3
|
+
* v5.2: smarter dedup — ignores records with different numbers/dates/IDs.
|
|
3
4
|
*/
|
|
4
5
|
|
|
5
6
|
import { loadArchival, rewriteArchival } from "./archival.js";
|
|
6
7
|
import {
|
|
7
8
|
loadEmbeddingCache, saveEmbeddingCache, getEmbedding, cosineSimilarity,
|
|
8
9
|
} from "./embedding.js";
|
|
9
|
-
|
|
10
|
+
|
|
11
|
+
// Raised from 0.92 to 0.96 — fewer false positives
|
|
12
|
+
const DEDUP_THRESHOLD = 0.96;
|
|
13
|
+
|
|
14
|
+
/**
|
|
15
|
+
* Extract numbers, dates, and IDs from text for comparison.
|
|
16
|
+
* Two records with different numbers are NOT duplicates even if semantically similar.
|
|
17
|
+
*/
|
|
18
|
+
function extractIdentifiers(text) {
|
|
19
|
+
const numbers = (text.match(/\$?[\d,.]+%?/g) || []).map((n) => n.replace(/[,$]/g, ""));
|
|
20
|
+
const dates = text.match(/\d{4}-\d{2}-\d{2}/g) || [];
|
|
21
|
+
const ids = text.match(/#\d+|PR\s*#?\d+|U\d{5,}|IMM-\d+/gi) || [];
|
|
22
|
+
return [...numbers, ...dates, ...ids].map((s) => s.toLowerCase());
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
function hasDifferentIdentifiers(a, b) {
|
|
26
|
+
const idsA = extractIdentifiers(a);
|
|
27
|
+
const idsB = extractIdentifiers(b);
|
|
28
|
+
if (idsA.length === 0 || idsB.length === 0) return false;
|
|
29
|
+
// If both have identifiers but they differ → not duplicates
|
|
30
|
+
const setA = new Set(idsA);
|
|
31
|
+
const setB = new Set(idsB);
|
|
32
|
+
const overlap = [...setA].filter((x) => setB.has(x)).length;
|
|
33
|
+
return overlap === 0 && idsA.length > 0 && idsB.length > 0;
|
|
34
|
+
}
|
|
10
35
|
|
|
11
36
|
/**
|
|
12
37
|
* Find near-duplicate pairs in archival memory.
|
|
13
|
-
* @returns {Array<{ keep: object, drop: object, similarity: number }>}
|
|
14
38
|
*/
|
|
15
39
|
export async function findDuplicates(ws) {
|
|
16
40
|
const records = loadArchival(ws);
|
|
17
41
|
const embCache = loadEmbeddingCache(ws);
|
|
18
42
|
|
|
19
|
-
// Build missing embeddings
|
|
20
43
|
for (const r of records) {
|
|
21
44
|
if (!embCache[r.id]) {
|
|
22
45
|
const emb = await getEmbedding(
|
|
@@ -27,7 +50,6 @@ export async function findDuplicates(ws) {
|
|
|
27
50
|
}
|
|
28
51
|
saveEmbeddingCache(ws);
|
|
29
52
|
|
|
30
|
-
// O(n²) pairwise comparison
|
|
31
53
|
const dupes = [];
|
|
32
54
|
for (let i = 0; i < records.length; i++) {
|
|
33
55
|
for (let j = i + 1; j < records.length; j++) {
|
|
@@ -35,16 +57,19 @@ export async function findDuplicates(ws) {
|
|
|
35
57
|
const embB = embCache[records[j].id];
|
|
36
58
|
if (!embA || !embB) continue;
|
|
37
59
|
const sim = cosineSimilarity(embA, embB);
|
|
38
|
-
if (sim
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
60
|
+
if (sim < DEDUP_THRESHOLD) continue;
|
|
61
|
+
|
|
62
|
+
// Smart check: if records contain different numbers/dates/IDs, skip
|
|
63
|
+
if (hasDifferentIdentifiers(records[i].content, records[j].content)) continue;
|
|
64
|
+
|
|
65
|
+
const keepIdx =
|
|
66
|
+
(records[j].access_count || 0) >= (records[i].access_count || 0) ? j : i;
|
|
67
|
+
const dropIdx = keepIdx === i ? j : i;
|
|
68
|
+
dupes.push({
|
|
69
|
+
keep: records[keepIdx],
|
|
70
|
+
drop: records[dropIdx],
|
|
71
|
+
similarity: Math.round(sim * 1000) / 1000,
|
|
72
|
+
});
|
|
48
73
|
}
|
|
49
74
|
}
|
|
50
75
|
return dupes;
|
|
@@ -52,7 +77,6 @@ export async function findDuplicates(ws) {
|
|
|
52
77
|
|
|
53
78
|
/**
|
|
54
79
|
* Remove duplicate records from archival.
|
|
55
|
-
* @returns {{ removed: number, remaining: number }}
|
|
56
80
|
*/
|
|
57
81
|
export function applyDedup(ws, dupes) {
|
|
58
82
|
const records = loadArchival(ws);
|
package/lib/graph.js
CHANGED
|
@@ -28,11 +28,17 @@ export function loadGraph(ws) {
|
|
|
28
28
|
export function addTriple(ws, subject, relation, object, sourceId = null) {
|
|
29
29
|
const triples = loadGraph(ws);
|
|
30
30
|
|
|
31
|
+
// Case-insensitive dedup to prevent "Edmonton" vs "edmonton" duplicates
|
|
31
32
|
const exists = triples.some(
|
|
32
|
-
(t) => t.s === subject
|
|
33
|
+
(t) => t.s.toLowerCase() === subject.toLowerCase() &&
|
|
34
|
+
t.r.toLowerCase() === relation.toLowerCase() &&
|
|
35
|
+
t.o.toLowerCase() === object.toLowerCase(),
|
|
33
36
|
);
|
|
34
37
|
if (exists) return null;
|
|
35
38
|
|
|
39
|
+
// Reject if subject or object is too long (garbage prevention)
|
|
40
|
+
if (subject.length > 30 || object.length > 40) return null;
|
|
41
|
+
|
|
36
42
|
const triple = {
|
|
37
43
|
id: `tri-${Date.now()}-${Math.random().toString(36).slice(2, 6)}`,
|
|
38
44
|
s: subject,
|
package/lib/paths.js
CHANGED
|
@@ -9,7 +9,7 @@ export const DEFAULT_TOP_K = 5;
|
|
|
9
9
|
export const MAX_TOP_K = 20;
|
|
10
10
|
export const EMBEDDING_MODEL = "text-embedding-3-small";
|
|
11
11
|
export const EMBEDDING_DIM = 512;
|
|
12
|
-
export const DEDUP_SIMILARITY_THRESHOLD = 0.92
|
|
12
|
+
export const DEDUP_SIMILARITY_THRESHOLD = 0.96; // raised from 0.92 to reduce false positives
|
|
13
13
|
|
|
14
14
|
import { readFileSync as _readFileSync, existsSync as _existsSync } from "node:fs";
|
|
15
15
|
|
package/package.json
CHANGED