@icex-labs/openclaw-memory-engine 5.2.0 → 5.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/index.js +2 -10
- package/lib/dedup.js +39 -15
- package/lib/graph.js +7 -1
- package/lib/paths.js +1 -1
- package/package.json +1 -1
- package/setup.sh +63 -17
package/index.js
CHANGED
|
@@ -147,16 +147,8 @@ export default definePluginEntry({
|
|
|
147
147
|
} catch { /* don't break message flow */ }
|
|
148
148
|
}, { name: "memory-engine-capture-received", description: "Auto-capture facts from incoming messages" });
|
|
149
149
|
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
const ctx = event.context;
|
|
153
|
-
if (!ctx?.content || !ctx?.success) return;
|
|
154
|
-
if (ctx.content.length < 50) return;
|
|
155
|
-
const agentId = extractAgentId(event.sessionKey);
|
|
156
|
-
const wsDir = resolveWorkspace({ agentId });
|
|
157
|
-
captureMessage(wsDir, ctx.content, "agent-reply");
|
|
158
|
-
} catch { /* don't break message flow */ }
|
|
159
|
-
}, { name: "memory-engine-capture-sent", description: "Auto-capture facts from agent replies" });
|
|
150
|
+
// message:sent hook removed — agent replies are restatements, not new facts.
|
|
151
|
+
// Only user messages (message:received) are auto-captured.
|
|
160
152
|
|
|
161
153
|
// ─── core_memory_read ───
|
|
162
154
|
api.registerTool(withAgent((agentId) => ({
|
package/lib/dedup.js
CHANGED
|
@@ -1,22 +1,45 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* Deduplication via embedding cosine similarity.
|
|
3
|
+
* v5.2: smarter dedup — ignores records with different numbers/dates/IDs.
|
|
3
4
|
*/
|
|
4
5
|
|
|
5
6
|
import { loadArchival, rewriteArchival } from "./archival.js";
|
|
6
7
|
import {
|
|
7
8
|
loadEmbeddingCache, saveEmbeddingCache, getEmbedding, cosineSimilarity,
|
|
8
9
|
} from "./embedding.js";
|
|
9
|
-
|
|
10
|
+
|
|
11
|
+
// Raised from 0.92 to 0.96 — fewer false positives
|
|
12
|
+
const DEDUP_THRESHOLD = 0.96;
|
|
13
|
+
|
|
14
|
+
/**
|
|
15
|
+
* Extract numbers, dates, and IDs from text for comparison.
|
|
16
|
+
* Two records with different numbers are NOT duplicates even if semantically similar.
|
|
17
|
+
*/
|
|
18
|
+
function extractIdentifiers(text) {
|
|
19
|
+
const numbers = (text.match(/\$?[\d,.]+%?/g) || []).map((n) => n.replace(/[,$]/g, ""));
|
|
20
|
+
const dates = text.match(/\d{4}-\d{2}-\d{2}/g) || [];
|
|
21
|
+
const ids = text.match(/#\d+|PR\s*#?\d+|U\d{5,}|IMM-\d+/gi) || [];
|
|
22
|
+
return [...numbers, ...dates, ...ids].map((s) => s.toLowerCase());
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
function hasDifferentIdentifiers(a, b) {
|
|
26
|
+
const idsA = extractIdentifiers(a);
|
|
27
|
+
const idsB = extractIdentifiers(b);
|
|
28
|
+
if (idsA.length === 0 || idsB.length === 0) return false;
|
|
29
|
+
// If both have identifiers but they differ → not duplicates
|
|
30
|
+
const setA = new Set(idsA);
|
|
31
|
+
const setB = new Set(idsB);
|
|
32
|
+
const overlap = [...setA].filter((x) => setB.has(x)).length;
|
|
33
|
+
return overlap === 0 && idsA.length > 0 && idsB.length > 0;
|
|
34
|
+
}
|
|
10
35
|
|
|
11
36
|
/**
|
|
12
37
|
* Find near-duplicate pairs in archival memory.
|
|
13
|
-
* @returns {Array<{ keep: object, drop: object, similarity: number }>}
|
|
14
38
|
*/
|
|
15
39
|
export async function findDuplicates(ws) {
|
|
16
40
|
const records = loadArchival(ws);
|
|
17
41
|
const embCache = loadEmbeddingCache(ws);
|
|
18
42
|
|
|
19
|
-
// Build missing embeddings
|
|
20
43
|
for (const r of records) {
|
|
21
44
|
if (!embCache[r.id]) {
|
|
22
45
|
const emb = await getEmbedding(
|
|
@@ -27,7 +50,6 @@ export async function findDuplicates(ws) {
|
|
|
27
50
|
}
|
|
28
51
|
saveEmbeddingCache(ws);
|
|
29
52
|
|
|
30
|
-
// O(n²) pairwise comparison
|
|
31
53
|
const dupes = [];
|
|
32
54
|
for (let i = 0; i < records.length; i++) {
|
|
33
55
|
for (let j = i + 1; j < records.length; j++) {
|
|
@@ -35,16 +57,19 @@ export async function findDuplicates(ws) {
|
|
|
35
57
|
const embB = embCache[records[j].id];
|
|
36
58
|
if (!embA || !embB) continue;
|
|
37
59
|
const sim = cosineSimilarity(embA, embB);
|
|
38
|
-
if (sim
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
60
|
+
if (sim < DEDUP_THRESHOLD) continue;
|
|
61
|
+
|
|
62
|
+
// Smart check: if records contain different numbers/dates/IDs, skip
|
|
63
|
+
if (hasDifferentIdentifiers(records[i].content, records[j].content)) continue;
|
|
64
|
+
|
|
65
|
+
const keepIdx =
|
|
66
|
+
(records[j].access_count || 0) >= (records[i].access_count || 0) ? j : i;
|
|
67
|
+
const dropIdx = keepIdx === i ? j : i;
|
|
68
|
+
dupes.push({
|
|
69
|
+
keep: records[keepIdx],
|
|
70
|
+
drop: records[dropIdx],
|
|
71
|
+
similarity: Math.round(sim * 1000) / 1000,
|
|
72
|
+
});
|
|
48
73
|
}
|
|
49
74
|
}
|
|
50
75
|
return dupes;
|
|
@@ -52,7 +77,6 @@ export async function findDuplicates(ws) {
|
|
|
52
77
|
|
|
53
78
|
/**
|
|
54
79
|
* Remove duplicate records from archival.
|
|
55
|
-
* @returns {{ removed: number, remaining: number }}
|
|
56
80
|
*/
|
|
57
81
|
export function applyDedup(ws, dupes) {
|
|
58
82
|
const records = loadArchival(ws);
|
package/lib/graph.js
CHANGED
|
@@ -28,11 +28,17 @@ export function loadGraph(ws) {
|
|
|
28
28
|
export function addTriple(ws, subject, relation, object, sourceId = null) {
|
|
29
29
|
const triples = loadGraph(ws);
|
|
30
30
|
|
|
31
|
+
// Case-insensitive dedup to prevent "Edmonton" vs "edmonton" duplicates
|
|
31
32
|
const exists = triples.some(
|
|
32
|
-
(t) => t.s === subject
|
|
33
|
+
(t) => t.s.toLowerCase() === subject.toLowerCase() &&
|
|
34
|
+
t.r.toLowerCase() === relation.toLowerCase() &&
|
|
35
|
+
t.o.toLowerCase() === object.toLowerCase(),
|
|
33
36
|
);
|
|
34
37
|
if (exists) return null;
|
|
35
38
|
|
|
39
|
+
// Reject if subject or object is too long (garbage prevention)
|
|
40
|
+
if (subject.length > 30 || object.length > 40) return null;
|
|
41
|
+
|
|
36
42
|
const triple = {
|
|
37
43
|
id: `tri-${Date.now()}-${Math.random().toString(36).slice(2, 6)}`,
|
|
38
44
|
s: subject,
|
package/lib/paths.js
CHANGED
|
@@ -9,7 +9,7 @@ export const DEFAULT_TOP_K = 5;
|
|
|
9
9
|
export const MAX_TOP_K = 20;
|
|
10
10
|
export const EMBEDDING_MODEL = "text-embedding-3-small";
|
|
11
11
|
export const EMBEDDING_DIM = 512;
|
|
12
|
-
export const DEDUP_SIMILARITY_THRESHOLD = 0.92
|
|
12
|
+
export const DEDUP_SIMILARITY_THRESHOLD = 0.96; // raised from 0.92 to reduce false positives
|
|
13
13
|
|
|
14
14
|
import { readFileSync as _readFileSync, existsSync as _existsSync } from "node:fs";
|
|
15
15
|
|
package/package.json
CHANGED
package/setup.sh
CHANGED
|
@@ -384,22 +384,53 @@ except:
|
|
|
384
384
|
|
|
385
385
|
echo " Agents found: $AGENTS"
|
|
386
386
|
|
|
387
|
-
# Register
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
|
|
387
|
+
# Register crons from JSON definition file (single source of truth)
|
|
388
|
+
CRON_JSON="$PLUGIN_DIR/extras/auto-consolidation-crons.json"
|
|
389
|
+
if [ -f "$CRON_JSON" ] && command -v python3 &>/dev/null; then
|
|
390
|
+
python3 -c "
|
|
391
|
+
import json, subprocess, sys
|
|
392
|
+
|
|
393
|
+
with open('$CRON_JSON') as f:
|
|
394
|
+
crons = json.load(f).get('crons', [])
|
|
395
|
+
|
|
396
|
+
existing = '''$EXISTING_CRONS'''
|
|
397
|
+
tz = '''$TZ_IANA'''
|
|
398
|
+
|
|
399
|
+
for c in crons:
|
|
400
|
+
name = c['id']
|
|
401
|
+
if name in existing:
|
|
402
|
+
print(f'⏭️ Cron \"{name}\" already exists')
|
|
403
|
+
continue
|
|
404
|
+
agent = c.get('agent', 'main')
|
|
405
|
+
cmd = [
|
|
406
|
+
'openclaw', 'cron', 'add',
|
|
407
|
+
'--name', name,
|
|
408
|
+
'--cron', c['schedule'],
|
|
409
|
+
'--tz', tz,
|
|
410
|
+
'--agent', agent,
|
|
411
|
+
'--session', 'isolated',
|
|
412
|
+
'--model', c.get('model', 'anthropic/claude-sonnet-4-6'),
|
|
413
|
+
'--message', c['message'],
|
|
414
|
+
'--description', c.get('description', ''),
|
|
415
|
+
'--timeout', '60000',
|
|
416
|
+
]
|
|
417
|
+
result = subprocess.run(cmd, capture_output=True, text=True)
|
|
418
|
+
if result.returncode == 0:
|
|
419
|
+
print(f'✅ Cron \"{name}\" ({agent}) registered')
|
|
420
|
+
else:
|
|
421
|
+
print(f'⚠️ Cron \"{name}\" failed (gateway not running?)')
|
|
422
|
+
" 2>/dev/null
|
|
423
|
+
else
|
|
424
|
+
echo "⚠️ Cron JSON not found or python3 missing — registering defaults manually"
|
|
425
|
+
register_cron "memory-reflect-daily" "0 9 * * *" "main" \
|
|
426
|
+
"Run memory_reflect. Do NOT output to main chat." "Daily reflection"
|
|
427
|
+
register_cron "memory-consolidate-6h" "0 */6 * * *" "main" \
|
|
428
|
+
"Run memory_consolidate on today's daily log. Do NOT output to main chat." "Auto-consolidate"
|
|
429
|
+
register_cron "memory-dedup-weekly" "0 4 * * 0" "main" \
|
|
430
|
+
"Run archival_deduplicate with apply=true. Do NOT output to main chat." "Weekly dedup"
|
|
431
|
+
register_cron "memory-dashboard-daily" "30 9 * * *" "main" \
|
|
432
|
+
"Run memory_dashboard. Do NOT output to main chat." "Dashboard refresh" 30000
|
|
433
|
+
fi
|
|
403
434
|
|
|
404
435
|
# Register per-agent crons for agents with separate workspaces
|
|
405
436
|
STAGGER=0
|
|
@@ -441,7 +472,22 @@ else
|
|
|
441
472
|
echo "⚠️ openclaw CLI not found — skipping cron registration"
|
|
442
473
|
fi
|
|
443
474
|
|
|
444
|
-
# --- 9.
|
|
475
|
+
# --- 9. Track installed version ---
|
|
476
|
+
INSTALLED_VERSION=$(python3 -c "
|
|
477
|
+
import json
|
|
478
|
+
with open('$PLUGIN_DIR/package.json') as f: print(json.load(f).get('version','unknown'))
|
|
479
|
+
" 2>/dev/null || echo "unknown")
|
|
480
|
+
PREV_VERSION=""
|
|
481
|
+
VERSION_FILE="$MEMORY_DIR/.memory-engine-version"
|
|
482
|
+
[ -f "$VERSION_FILE" ] && PREV_VERSION=$(cat "$VERSION_FILE")
|
|
483
|
+
echo "$INSTALLED_VERSION" > "$VERSION_FILE"
|
|
484
|
+
|
|
485
|
+
if [ -n "$PREV_VERSION" ] && [ "$PREV_VERSION" != "$INSTALLED_VERSION" ]; then
|
|
486
|
+
echo ""
|
|
487
|
+
echo "📦 Upgraded: $PREV_VERSION → $INSTALLED_VERSION"
|
|
488
|
+
fi
|
|
489
|
+
|
|
490
|
+
# --- 10. Validate config ---
|
|
445
491
|
echo ""
|
|
446
492
|
if command -v openclaw &>/dev/null; then
|
|
447
493
|
openclaw config validate 2>&1 && echo "✅ Config valid" || echo "❌ Config validation failed"
|