@icex-labs/openclaw-memory-engine 5.2.1 → 5.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/index.js CHANGED
@@ -147,16 +147,8 @@ export default definePluginEntry({
147
147
  } catch { /* don't break message flow */ }
148
148
  }, { name: "memory-engine-capture-received", description: "Auto-capture facts from incoming messages" });
149
149
 
150
- api.registerHook("message:sent", (event) => {
151
- try {
152
- const ctx = event.context;
153
- if (!ctx?.content || !ctx?.success) return;
154
- if (ctx.content.length < 50) return;
155
- const agentId = extractAgentId(event.sessionKey);
156
- const wsDir = resolveWorkspace({ agentId });
157
- captureMessage(wsDir, ctx.content, "agent-reply");
158
- } catch { /* don't break message flow */ }
159
- }, { name: "memory-engine-capture-sent", description: "Auto-capture facts from agent replies" });
150
+ // message:sent hook removed — agent replies are restatements, not new facts.
151
+ // Only user messages (message:received) are auto-captured.
160
152
 
161
153
  // ─── core_memory_read ───
162
154
  api.registerTool(withAgent((agentId) => ({
package/lib/dedup.js CHANGED
@@ -1,22 +1,45 @@
1
1
  /**
2
2
  * Deduplication via embedding cosine similarity.
3
+ * v5.2: smarter dedup — ignores records with different numbers/dates/IDs.
3
4
  */
4
5
 
5
6
  import { loadArchival, rewriteArchival } from "./archival.js";
6
7
  import {
7
8
  loadEmbeddingCache, saveEmbeddingCache, getEmbedding, cosineSimilarity,
8
9
  } from "./embedding.js";
9
- import { DEDUP_SIMILARITY_THRESHOLD } from "./paths.js";
10
+
11
+ // Raised from 0.92 to 0.96 — fewer false positives
12
+ const DEDUP_THRESHOLD = 0.96;
13
+
14
+ /**
15
+ * Extract numbers, dates, and IDs from text for comparison.
16
+ * Two records with different numbers are NOT duplicates even if semantically similar.
17
+ */
18
+ function extractIdentifiers(text) {
19
+ const numbers = (text.match(/\$?[\d,.]+%?/g) || []).map((n) => n.replace(/[,$]/g, ""));
20
+ const dates = text.match(/\d{4}-\d{2}-\d{2}/g) || [];
21
+ const ids = text.match(/#\d+|PR\s*#?\d+|U\d{5,}|IMM-\d+/gi) || [];
22
+ return [...numbers, ...dates, ...ids].map((s) => s.toLowerCase());
23
+ }
24
+
25
+ function hasDifferentIdentifiers(a, b) {
26
+ const idsA = extractIdentifiers(a);
27
+ const idsB = extractIdentifiers(b);
28
+ if (idsA.length === 0 || idsB.length === 0) return false;
29
+ // If both have identifiers but they differ → not duplicates
30
+ const setA = new Set(idsA);
31
+ const setB = new Set(idsB);
32
+ const overlap = [...setA].filter((x) => setB.has(x)).length;
33
+ return overlap === 0 && idsA.length > 0 && idsB.length > 0;
34
+ }
10
35
 
11
36
  /**
12
37
  * Find near-duplicate pairs in archival memory.
13
- * @returns {Array<{ keep: object, drop: object, similarity: number }>}
14
38
  */
15
39
  export async function findDuplicates(ws) {
16
40
  const records = loadArchival(ws);
17
41
  const embCache = loadEmbeddingCache(ws);
18
42
 
19
- // Build missing embeddings
20
43
  for (const r of records) {
21
44
  if (!embCache[r.id]) {
22
45
  const emb = await getEmbedding(
@@ -27,7 +50,6 @@ export async function findDuplicates(ws) {
27
50
  }
28
51
  saveEmbeddingCache(ws);
29
52
 
30
- // O(n²) pairwise comparison
31
53
  const dupes = [];
32
54
  for (let i = 0; i < records.length; i++) {
33
55
  for (let j = i + 1; j < records.length; j++) {
@@ -35,16 +57,19 @@ export async function findDuplicates(ws) {
35
57
  const embB = embCache[records[j].id];
36
58
  if (!embA || !embB) continue;
37
59
  const sim = cosineSimilarity(embA, embB);
38
- if (sim >= DEDUP_SIMILARITY_THRESHOLD) {
39
- const keepIdx =
40
- (records[j].access_count || 0) >= (records[i].access_count || 0) ? j : i;
41
- const dropIdx = keepIdx === i ? j : i;
42
- dupes.push({
43
- keep: records[keepIdx],
44
- drop: records[dropIdx],
45
- similarity: Math.round(sim * 1000) / 1000,
46
- });
47
- }
60
+ if (sim < DEDUP_THRESHOLD) continue;
61
+
62
+ // Smart check: if records contain different numbers/dates/IDs, skip
63
+ if (hasDifferentIdentifiers(records[i].content, records[j].content)) continue;
64
+
65
+ const keepIdx =
66
+ (records[j].access_count || 0) >= (records[i].access_count || 0) ? j : i;
67
+ const dropIdx = keepIdx === i ? j : i;
68
+ dupes.push({
69
+ keep: records[keepIdx],
70
+ drop: records[dropIdx],
71
+ similarity: Math.round(sim * 1000) / 1000,
72
+ });
48
73
  }
49
74
  }
50
75
  return dupes;
@@ -52,7 +77,6 @@ export async function findDuplicates(ws) {
52
77
 
53
78
  /**
54
79
  * Remove duplicate records from archival.
55
- * @returns {{ removed: number, remaining: number }}
56
80
  */
57
81
  export function applyDedup(ws, dupes) {
58
82
  const records = loadArchival(ws);
package/lib/graph.js CHANGED
@@ -28,11 +28,17 @@ export function loadGraph(ws) {
28
28
  export function addTriple(ws, subject, relation, object, sourceId = null) {
29
29
  const triples = loadGraph(ws);
30
30
 
31
+ // Case-insensitive dedup to prevent "Edmonton" vs "edmonton" duplicates
31
32
  const exists = triples.some(
32
- (t) => t.s === subject && t.r === relation && t.o === object,
33
+ (t) => t.s.toLowerCase() === subject.toLowerCase() &&
34
+ t.r.toLowerCase() === relation.toLowerCase() &&
35
+ t.o.toLowerCase() === object.toLowerCase(),
33
36
  );
34
37
  if (exists) return null;
35
38
 
39
+ // Reject if subject or object is too long (garbage prevention)
40
+ if (subject.length > 30 || object.length > 40) return null;
41
+
36
42
  const triple = {
37
43
  id: `tri-${Date.now()}-${Math.random().toString(36).slice(2, 6)}`,
38
44
  s: subject,
package/lib/paths.js CHANGED
@@ -9,7 +9,7 @@ export const DEFAULT_TOP_K = 5;
9
9
  export const MAX_TOP_K = 20;
10
10
  export const EMBEDDING_MODEL = "text-embedding-3-small";
11
11
  export const EMBEDDING_DIM = 512;
12
- export const DEDUP_SIMILARITY_THRESHOLD = 0.92;
12
+ export const DEDUP_SIMILARITY_THRESHOLD = 0.96; // raised from 0.92 to reduce false positives
13
13
 
14
14
  import { readFileSync as _readFileSync, existsSync as _existsSync } from "node:fs";
15
15
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@icex-labs/openclaw-memory-engine",
3
- "version": "5.2.1",
3
+ "version": "5.3.0",
4
4
  "description": "MemGPT-style hierarchical memory plugin for OpenClaw — core memory block + archival storage with semantic search",
5
5
  "type": "module",
6
6
  "main": "index.js",