@icex-labs/openclaw-memory-engine 3.3.2 β 3.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/extras/migrate-legacy.mjs +165 -0
- package/index.js +21 -1
- package/lib/embedding.js +64 -0
- package/package.json +1 -1
- package/setup.sh +34 -0
|
@@ -0,0 +1,165 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
/**
|
|
3
|
+
* migrate-legacy.mjs β Import existing file-based memory into archival.jsonl
|
|
4
|
+
*
|
|
5
|
+
* Scans workspace for: MEMORY.md, memory/*.md, memory/weekly/*.md, memory/topics/*.md
|
|
6
|
+
* Extracts facts, deduplicates, and appends to memory/archival.jsonl.
|
|
7
|
+
*
|
|
8
|
+
* Usage: node migrate-legacy.mjs [workspace_path]
|
|
9
|
+
*/
|
|
10
|
+
|
|
11
|
+
import { readFileSync, appendFileSync, existsSync, readdirSync } from "node:fs";
|
|
12
|
+
import { join, basename } from "node:path";
|
|
13
|
+
|
|
14
|
+
const WS = process.argv[2] || process.env.OPENCLAW_WORKSPACE || join(process.env.HOME || "/tmp", ".openclaw", "workspace");
|
|
15
|
+
const ARCHIVAL = join(WS, "memory", "archival.jsonl");
|
|
16
|
+
|
|
17
|
+
console.log(`π§ Legacy memory migration`);
|
|
18
|
+
console.log(` Workspace: ${WS}`);
|
|
19
|
+
console.log(` Archival: ${ARCHIVAL}`);
|
|
20
|
+
console.log(``);
|
|
21
|
+
|
|
22
|
+
// Load existing archival for dedup
|
|
23
|
+
const existingContent = new Set();
|
|
24
|
+
if (existsSync(ARCHIVAL)) {
|
|
25
|
+
for (const line of readFileSync(ARCHIVAL, "utf-8").trim().split("\n").filter(Boolean)) {
|
|
26
|
+
try { existingContent.add(JSON.parse(line).content?.toLowerCase()); } catch {}
|
|
27
|
+
}
|
|
28
|
+
}
|
|
29
|
+
console.log(`Existing archival: ${existingContent.size} records`);
|
|
30
|
+
|
|
31
|
+
// Generic entity inference (no personal data)
|
|
32
|
+
const ENTITY_PATTERNS = [
|
|
33
|
+
[/\b(IBKR|Interactive Brokers|NAV|portfolio|ζθ΅|HELOC|mortgage|finance)/i, "finance"],
|
|
34
|
+
[/\b(immigration|PR|IRCC|CBSA|visa|εΎεΈ|lawyer|petition)/i, "immigration"],
|
|
35
|
+
[/\b(quant|trading|backtest|signal|portfolio|Sharpe)/i, "trading"],
|
|
36
|
+
[/\b(doctor|ε»η|hospital|health|medication|θ―|δ½ζ£|clinic)/i, "health"],
|
|
37
|
+
[/\b(car|vehicle|SUV|sedan|truck)\b/i, "vehicles"],
|
|
38
|
+
[/\b(k3d|ArgoCD|Helm|kubectl|GitOps|cluster|deploy|CI|CD)/i, "infrastructure"],
|
|
39
|
+
[/\b(OpenClaw|gateway|plugin|session|agent|memory|compaction)/i, "openclaw"],
|
|
40
|
+
[/\b(Discord|Telegram|Slack|bot|channel)/i, "messaging"],
|
|
41
|
+
[/\b(school|university|college|ε¦ζ ‘|education)/i, "education"],
|
|
42
|
+
[/\b(house|home|property|rent|ζΏ)/i, "property"],
|
|
43
|
+
[/\b(lawyer|legal|court|lawsuit|ζ‘|θ―)/i, "legal"],
|
|
44
|
+
];
|
|
45
|
+
|
|
46
|
+
function inferEntity(text) {
|
|
47
|
+
for (const [pat, name] of ENTITY_PATTERNS) {
|
|
48
|
+
if (pat.test(text)) return name;
|
|
49
|
+
}
|
|
50
|
+
return "general";
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
function extractFacts(text) {
|
|
54
|
+
const facts = [];
|
|
55
|
+
for (const line of text.split(/\n/).map((l) => l.trim()).filter(Boolean)) {
|
|
56
|
+
if (line.startsWith("#") || line.length < 15) continue;
|
|
57
|
+
if (/^(##|===|---|\*\*\*|```|>|\|)/.test(line)) continue;
|
|
58
|
+
const sentences = line.split(/(?<=[γ.οΌ!οΌ?οΌ;])\s*/).filter(Boolean);
|
|
59
|
+
for (const s of sentences) {
|
|
60
|
+
const clean = s.replace(/^[-*β’]\s*/, "").replace(/^\d+\.\s*/, "").trim();
|
|
61
|
+
if (clean.length >= 15 && clean.length <= 500) facts.push(clean);
|
|
62
|
+
}
|
|
63
|
+
}
|
|
64
|
+
return facts;
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
// Collect all legacy files
|
|
68
|
+
const files = [];
|
|
69
|
+
|
|
70
|
+
// MEMORY.md
|
|
71
|
+
const memoryMd = join(WS, "MEMORY.md");
|
|
72
|
+
if (existsSync(memoryMd)) files.push({ path: memoryMd, tag: "long-term" });
|
|
73
|
+
|
|
74
|
+
// memory/*.md (daily logs)
|
|
75
|
+
const memDir = join(WS, "memory");
|
|
76
|
+
if (existsSync(memDir)) {
|
|
77
|
+
for (const f of readdirSync(memDir).filter((f) => /\.md$/.test(f) && f !== ".abstract")) {
|
|
78
|
+
files.push({ path: join(memDir, f), tag: "daily" });
|
|
79
|
+
}
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
// memory/weekly/*.md
|
|
83
|
+
const weeklyDir = join(WS, "memory", "weekly");
|
|
84
|
+
if (existsSync(weeklyDir)) {
|
|
85
|
+
for (const f of readdirSync(weeklyDir).filter((f) => f.endsWith(".md"))) {
|
|
86
|
+
files.push({ path: join(weeklyDir, f), tag: "weekly" });
|
|
87
|
+
}
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
// memory/topics/*.md
|
|
91
|
+
const topicDir = join(WS, "memory", "topics");
|
|
92
|
+
if (existsSync(topicDir)) {
|
|
93
|
+
for (const f of readdirSync(topicDir).filter((f) => f.endsWith(".md"))) {
|
|
94
|
+
files.push({ path: join(topicDir, f), tag: "topic" });
|
|
95
|
+
}
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
if (files.length === 0) {
|
|
99
|
+
console.log("\nNo legacy memory files found. Nothing to migrate.");
|
|
100
|
+
process.exit(0);
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
console.log(`Found ${files.length} files to scan\n`);
|
|
104
|
+
|
|
105
|
+
let inserted = 0;
|
|
106
|
+
let skipped = 0;
|
|
107
|
+
|
|
108
|
+
for (const { path, tag } of files) {
|
|
109
|
+
const content = readFileSync(path, "utf-8");
|
|
110
|
+
const facts = extractFacts(content);
|
|
111
|
+
let fileInserted = 0;
|
|
112
|
+
|
|
113
|
+
for (const fact of facts) {
|
|
114
|
+
const factLower = fact.toLowerCase();
|
|
115
|
+
|
|
116
|
+
// Exact dedup
|
|
117
|
+
if (existingContent.has(factLower)) {
|
|
118
|
+
skipped++;
|
|
119
|
+
continue;
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
// Keyword overlap dedup (>75% overlap = skip)
|
|
123
|
+
let isDupe = false;
|
|
124
|
+
const factWords = new Set(factLower.split(/\s+/).filter((w) => w.length > 2));
|
|
125
|
+
if (factWords.size > 0) {
|
|
126
|
+
for (const ex of existingContent) {
|
|
127
|
+
const exWords = new Set(ex.split(/\s+/).filter((w) => w.length > 2));
|
|
128
|
+
let overlap = 0;
|
|
129
|
+
for (const w of factWords) {
|
|
130
|
+
if (exWords.has(w)) overlap++;
|
|
131
|
+
}
|
|
132
|
+
if (overlap / factWords.size > 0.75) {
|
|
133
|
+
isDupe = true;
|
|
134
|
+
break;
|
|
135
|
+
}
|
|
136
|
+
}
|
|
137
|
+
}
|
|
138
|
+
if (isDupe) {
|
|
139
|
+
skipped++;
|
|
140
|
+
continue;
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
const record = {
|
|
144
|
+
id: `arch-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`,
|
|
145
|
+
ts: new Date().toISOString(),
|
|
146
|
+
last_accessed: null,
|
|
147
|
+
access_count: 0,
|
|
148
|
+
importance: 5,
|
|
149
|
+
content: fact,
|
|
150
|
+
entity: inferEntity(fact),
|
|
151
|
+
tags: [tag],
|
|
152
|
+
source: "migration",
|
|
153
|
+
};
|
|
154
|
+
|
|
155
|
+
appendFileSync(ARCHIVAL, JSON.stringify(record) + "\n", "utf-8");
|
|
156
|
+
existingContent.add(factLower);
|
|
157
|
+
inserted++;
|
|
158
|
+
fileInserted++;
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
if (fileInserted > 0) console.log(` ${basename(path)}: +${fileInserted} facts`);
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
console.log(`\nβ
Migration complete: ${inserted} facts imported, ${skipped} skipped (duplicates)`);
|
|
165
|
+
console.log(`Total archival: ${existingContent.size} records`);
|
package/index.js
CHANGED
|
@@ -20,7 +20,7 @@ import { existsSync } from "node:fs";
|
|
|
20
20
|
import { resolveWorkspace, getCoreSizeLimit, DEFAULT_TOP_K, MAX_TOP_K } from "./lib/paths.js";
|
|
21
21
|
import { readCore, writeCore, dotGet, dotSet, autoParse } from "./lib/core.js";
|
|
22
22
|
import { loadArchival, appendRecord, rewriteArchival, archivalPath } from "./lib/archival.js";
|
|
23
|
-
import { indexEmbedding, loadEmbeddingCache, saveEmbeddingCache } from "./lib/embedding.js";
|
|
23
|
+
import { indexEmbedding, loadEmbeddingCache, saveEmbeddingCache, backfillEmbeddings } from "./lib/embedding.js";
|
|
24
24
|
import { hybridSearch } from "./lib/search.js";
|
|
25
25
|
import { consolidateText } from "./lib/consolidate.js";
|
|
26
26
|
import { findDuplicates, applyDedup } from "./lib/dedup.js";
|
|
@@ -95,6 +95,26 @@ export default definePluginEntry({
|
|
|
95
95
|
// Factory ctx has: { sessionKey, workspaceDir, agentId, ... }
|
|
96
96
|
// βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
|
97
97
|
|
|
98
|
+
// Background: auto-backfill missing embeddings on startup
|
|
99
|
+
const defaultWs = resolveWorkspace(null);
|
|
100
|
+
setTimeout(() => {
|
|
101
|
+
try {
|
|
102
|
+
const records = loadArchival(defaultWs);
|
|
103
|
+
const cache = loadEmbeddingCache(defaultWs);
|
|
104
|
+
const missing = records.filter((r) => r.id && !cache[r.id]).length;
|
|
105
|
+
if (missing > 0) {
|
|
106
|
+
console.error(`[memory-engine] Backfilling ${missing} missing embeddings...`);
|
|
107
|
+
backfillEmbeddings(defaultWs, records, {
|
|
108
|
+
onProgress: (done, total) => {
|
|
109
|
+
if (done % 500 === 0) console.error(`[memory-engine] Embedding backfill: ${done}/${total}`);
|
|
110
|
+
},
|
|
111
|
+
}).then((result) => {
|
|
112
|
+
console.error(`[memory-engine] Backfill complete: ${result.processed} embedded, ${result.errors} errors`);
|
|
113
|
+
}).catch(() => {});
|
|
114
|
+
}
|
|
115
|
+
} catch { /* ignore startup errors */ }
|
|
116
|
+
}, 10000); // delay 10s after gateway start to avoid blocking
|
|
117
|
+
|
|
98
118
|
// βββ core_memory_read βββ
|
|
99
119
|
api.registerTool(withAgent((agentId) => ({
|
|
100
120
|
name: "core_memory_read",
|
package/lib/embedding.js
CHANGED
|
@@ -68,3 +68,67 @@ export async function indexEmbedding(ws, record) {
|
|
|
68
68
|
saveEmbeddingCache(ws);
|
|
69
69
|
}
|
|
70
70
|
}
|
|
71
|
+
|
|
72
|
+
/**
|
|
73
|
+
* Batch-embed records that are missing from cache.
|
|
74
|
+
* Runs in background with batching (100 per API call) and rate limiting.
|
|
75
|
+
* @param {string} ws - workspace path
|
|
76
|
+
* @param {object[]} records - all archival records
|
|
77
|
+
* @param {object} [options]
|
|
78
|
+
* @param {number} [options.batchSize=100]
|
|
79
|
+
* @param {number} [options.delayMs=200]
|
|
80
|
+
* @param {function} [options.onProgress] - callback(done, total)
|
|
81
|
+
*/
|
|
82
|
+
export async function backfillEmbeddings(ws, records, options = {}) {
|
|
83
|
+
const apiKey = resolveApiKey();
|
|
84
|
+
if (!apiKey) return { processed: 0, errors: 0, skipped: 0 };
|
|
85
|
+
|
|
86
|
+
const batchSize = options.batchSize || 100;
|
|
87
|
+
const delayMs = options.delayMs || 200;
|
|
88
|
+
const cache = loadEmbeddingCache(ws);
|
|
89
|
+
|
|
90
|
+
const missing = records.filter((r) => r.id && !cache[r.id]);
|
|
91
|
+
if (missing.length === 0) return { processed: 0, errors: 0, skipped: 0 };
|
|
92
|
+
|
|
93
|
+
let processed = 0;
|
|
94
|
+
let errors = 0;
|
|
95
|
+
|
|
96
|
+
for (let i = 0; i < missing.length; i += batchSize) {
|
|
97
|
+
const batch = missing.slice(i, i + batchSize);
|
|
98
|
+
const texts = batch.map((r) =>
|
|
99
|
+
[r.content, r.entity, ...(r.tags || [])].filter(Boolean).join(" "),
|
|
100
|
+
);
|
|
101
|
+
|
|
102
|
+
try {
|
|
103
|
+
const res = await fetch("https://api.openai.com/v1/embeddings", {
|
|
104
|
+
method: "POST",
|
|
105
|
+
headers: { Authorization: `Bearer ${apiKey}`, "Content-Type": "application/json" },
|
|
106
|
+
body: JSON.stringify({ input: texts, model: EMBEDDING_MODEL, dimensions: EMBEDDING_DIM }),
|
|
107
|
+
});
|
|
108
|
+
|
|
109
|
+
if (!res.ok) { errors++; continue; }
|
|
110
|
+
|
|
111
|
+
const data = await res.json();
|
|
112
|
+
for (let j = 0; j < batch.length; j++) {
|
|
113
|
+
if (data.data?.[j]?.embedding) {
|
|
114
|
+
cache[batch[j].id] = data.data[j].embedding;
|
|
115
|
+
processed++;
|
|
116
|
+
}
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
// Save after each batch
|
|
120
|
+
saveEmbeddingCache(ws);
|
|
121
|
+
|
|
122
|
+
if (options.onProgress) options.onProgress(processed, missing.length);
|
|
123
|
+
|
|
124
|
+
// Rate limit
|
|
125
|
+
if (i + batchSize < missing.length) {
|
|
126
|
+
await new Promise((r) => setTimeout(r, delayMs));
|
|
127
|
+
}
|
|
128
|
+
} catch {
|
|
129
|
+
errors++;
|
|
130
|
+
}
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
return { processed, errors, skipped: missing.length - processed - errors };
|
|
134
|
+
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@icex-labs/openclaw-memory-engine",
|
|
3
|
-
"version": "3.
|
|
3
|
+
"version": "3.5.0",
|
|
4
4
|
"description": "MemGPT-style hierarchical memory plugin for OpenClaw β core memory block + archival storage with semantic search",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "index.js",
|
package/setup.sh
CHANGED
|
@@ -106,6 +106,40 @@ else
|
|
|
106
106
|
echo "βοΈ archival.jsonl already exists ($lines records)"
|
|
107
107
|
fi
|
|
108
108
|
|
|
109
|
+
# --- 3b. Migrate legacy memory files into archival ---
|
|
110
|
+
if command -v node &>/dev/null && [ -f "$PLUGIN_DIR/extras/migrate-legacy.mjs" ]; then
|
|
111
|
+
# Check if there are legacy files to migrate
|
|
112
|
+
legacy_count=0
|
|
113
|
+
[ -f "$WORKSPACE/MEMORY.md" ] && legacy_count=$((legacy_count + 1))
|
|
114
|
+
legacy_count=$((legacy_count + $(ls "$MEMORY_DIR"/*.md 2>/dev/null | wc -l | tr -d ' ')))
|
|
115
|
+
legacy_count=$((legacy_count + $(ls "$MEMORY_DIR"/weekly/*.md 2>/dev/null | wc -l | tr -d ' ')))
|
|
116
|
+
legacy_count=$((legacy_count + $(ls "$MEMORY_DIR"/topics/*.md 2>/dev/null | wc -l | tr -d ' ')))
|
|
117
|
+
|
|
118
|
+
archival_count=$(wc -l < "$MEMORY_DIR/archival.jsonl" 2>/dev/null | tr -d ' ' || echo "0")
|
|
119
|
+
|
|
120
|
+
if [ "$legacy_count" -gt 0 ] && [ "$archival_count" -lt 10 ]; then
|
|
121
|
+
echo ""
|
|
122
|
+
echo "π¦ Found $legacy_count legacy memory files (MEMORY.md, daily logs, weekly summaries, topics)."
|
|
123
|
+
if $NON_INTERACTIVE; then
|
|
124
|
+
echo " Migrating automatically..."
|
|
125
|
+
node "$PLUGIN_DIR/extras/migrate-legacy.mjs" "$WORKSPACE" 2>&1 | tail -3
|
|
126
|
+
else
|
|
127
|
+
printf " Migrate into archival memory? [Y/n]: "
|
|
128
|
+
read -r migrate_answer
|
|
129
|
+
if [ "${migrate_answer:-Y}" != "n" ] && [ "${migrate_answer:-Y}" != "N" ]; then
|
|
130
|
+
node "$PLUGIN_DIR/extras/migrate-legacy.mjs" "$WORKSPACE" 2>&1 | tail -5
|
|
131
|
+
else
|
|
132
|
+
echo "βοΈ Skipping migration. Run manually later: node $PLUGIN_DIR/extras/migrate-legacy.mjs $WORKSPACE"
|
|
133
|
+
fi
|
|
134
|
+
fi
|
|
135
|
+
echo ""
|
|
136
|
+
else
|
|
137
|
+
if [ "$archival_count" -gt 10 ]; then
|
|
138
|
+
echo "βοΈ Archival already has $archival_count records, skipping migration"
|
|
139
|
+
fi
|
|
140
|
+
fi
|
|
141
|
+
fi
|
|
142
|
+
|
|
109
143
|
# --- 4. Install memory-maintenance.sh ---
|
|
110
144
|
SCRIPTS_DIR="$WORKSPACE/scripts"
|
|
111
145
|
mkdir -p "$SCRIPTS_DIR"
|