@icex-labs/openclaw-memory-engine 3.3.2 β†’ 3.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,165 @@
1
+ #!/usr/bin/env node
2
+ /**
3
+ * migrate-legacy.mjs β€” Import existing file-based memory into archival.jsonl
4
+ *
5
+ * Scans workspace for: MEMORY.md, memory/*.md, memory/weekly/*.md, memory/topics/*.md
6
+ * Extracts facts, deduplicates, and appends to memory/archival.jsonl.
7
+ *
8
+ * Usage: node migrate-legacy.mjs [workspace_path]
9
+ */
10
+
11
+ import { readFileSync, appendFileSync, existsSync, readdirSync } from "node:fs";
12
+ import { join, basename } from "node:path";
13
+
14
+ const WS = process.argv[2] || process.env.OPENCLAW_WORKSPACE || join(process.env.HOME || "/tmp", ".openclaw", "workspace");
15
+ const ARCHIVAL = join(WS, "memory", "archival.jsonl");
16
+
17
+ console.log(`🧠 Legacy memory migration`);
18
+ console.log(` Workspace: ${WS}`);
19
+ console.log(` Archival: ${ARCHIVAL}`);
20
+ console.log(``);
21
+
22
+ // Load existing archival for dedup
23
+ const existingContent = new Set();
24
+ if (existsSync(ARCHIVAL)) {
25
+ for (const line of readFileSync(ARCHIVAL, "utf-8").trim().split("\n").filter(Boolean)) {
26
+ try { existingContent.add(JSON.parse(line).content?.toLowerCase()); } catch {}
27
+ }
28
+ }
29
+ console.log(`Existing archival: ${existingContent.size} records`);
30
+
31
+ // Generic entity inference (no personal data)
32
+ const ENTITY_PATTERNS = [
33
+ [/\b(IBKR|Interactive Brokers|NAV|portfolio|ζŠ•θ΅„|HELOC|mortgage|finance)/i, "finance"],
34
+ [/\b(immigration|PR|IRCC|CBSA|visa|εΎ‹εΈˆ|lawyer|petition)/i, "immigration"],
35
+ [/\b(quant|trading|backtest|signal|portfolio|Sharpe)/i, "trading"],
36
+ [/\b(doctor|εŒ»η”Ÿ|hospital|health|medication|药|体检|clinic)/i, "health"],
37
+ [/\b(car|vehicle|SUV|sedan|truck)\b/i, "vehicles"],
38
+ [/\b(k3d|ArgoCD|Helm|kubectl|GitOps|cluster|deploy|CI|CD)/i, "infrastructure"],
39
+ [/\b(OpenClaw|gateway|plugin|session|agent|memory|compaction)/i, "openclaw"],
40
+ [/\b(Discord|Telegram|Slack|bot|channel)/i, "messaging"],
41
+ [/\b(school|university|college|ε­¦ζ ‘|education)/i, "education"],
42
+ [/\b(house|home|property|rent|房)/i, "property"],
43
+ [/\b(lawyer|legal|court|lawsuit|摈|θ―‰)/i, "legal"],
44
+ ];
45
+
46
+ function inferEntity(text) {
47
+ for (const [pat, name] of ENTITY_PATTERNS) {
48
+ if (pat.test(text)) return name;
49
+ }
50
+ return "general";
51
+ }
52
+
53
+ function extractFacts(text) {
54
+ const facts = [];
55
+ for (const line of text.split(/\n/).map((l) => l.trim()).filter(Boolean)) {
56
+ if (line.startsWith("#") || line.length < 15) continue;
57
+ if (/^(##|===|---|\*\*\*|```|>|\|)/.test(line)) continue;
58
+ const sentences = line.split(/(?<=[。.!!??οΌ›;])\s*/).filter(Boolean);
59
+ for (const s of sentences) {
60
+ const clean = s.replace(/^[-*β€’]\s*/, "").replace(/^\d+\.\s*/, "").trim();
61
+ if (clean.length >= 15 && clean.length <= 500) facts.push(clean);
62
+ }
63
+ }
64
+ return facts;
65
+ }
66
+
67
+ // Collect all legacy files
68
+ const files = [];
69
+
70
+ // MEMORY.md
71
+ const memoryMd = join(WS, "MEMORY.md");
72
+ if (existsSync(memoryMd)) files.push({ path: memoryMd, tag: "long-term" });
73
+
74
+ // memory/*.md (daily logs)
75
+ const memDir = join(WS, "memory");
76
+ if (existsSync(memDir)) {
77
+ for (const f of readdirSync(memDir).filter((f) => /\.md$/.test(f) && f !== ".abstract")) {
78
+ files.push({ path: join(memDir, f), tag: "daily" });
79
+ }
80
+ }
81
+
82
+ // memory/weekly/*.md
83
+ const weeklyDir = join(WS, "memory", "weekly");
84
+ if (existsSync(weeklyDir)) {
85
+ for (const f of readdirSync(weeklyDir).filter((f) => f.endsWith(".md"))) {
86
+ files.push({ path: join(weeklyDir, f), tag: "weekly" });
87
+ }
88
+ }
89
+
90
+ // memory/topics/*.md
91
+ const topicDir = join(WS, "memory", "topics");
92
+ if (existsSync(topicDir)) {
93
+ for (const f of readdirSync(topicDir).filter((f) => f.endsWith(".md"))) {
94
+ files.push({ path: join(topicDir, f), tag: "topic" });
95
+ }
96
+ }
97
+
98
+ if (files.length === 0) {
99
+ console.log("\nNo legacy memory files found. Nothing to migrate.");
100
+ process.exit(0);
101
+ }
102
+
103
+ console.log(`Found ${files.length} files to scan\n`);
104
+
105
+ let inserted = 0;
106
+ let skipped = 0;
107
+
108
+ for (const { path, tag } of files) {
109
+ const content = readFileSync(path, "utf-8");
110
+ const facts = extractFacts(content);
111
+ let fileInserted = 0;
112
+
113
+ for (const fact of facts) {
114
+ const factLower = fact.toLowerCase();
115
+
116
+ // Exact dedup
117
+ if (existingContent.has(factLower)) {
118
+ skipped++;
119
+ continue;
120
+ }
121
+
122
+ // Keyword overlap dedup (>75% overlap = skip)
123
+ let isDupe = false;
124
+ const factWords = new Set(factLower.split(/\s+/).filter((w) => w.length > 2));
125
+ if (factWords.size > 0) {
126
+ for (const ex of existingContent) {
127
+ const exWords = new Set(ex.split(/\s+/).filter((w) => w.length > 2));
128
+ let overlap = 0;
129
+ for (const w of factWords) {
130
+ if (exWords.has(w)) overlap++;
131
+ }
132
+ if (overlap / factWords.size > 0.75) {
133
+ isDupe = true;
134
+ break;
135
+ }
136
+ }
137
+ }
138
+ if (isDupe) {
139
+ skipped++;
140
+ continue;
141
+ }
142
+
143
+ const record = {
144
+ id: `arch-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`,
145
+ ts: new Date().toISOString(),
146
+ last_accessed: null,
147
+ access_count: 0,
148
+ importance: 5,
149
+ content: fact,
150
+ entity: inferEntity(fact),
151
+ tags: [tag],
152
+ source: "migration",
153
+ };
154
+
155
+ appendFileSync(ARCHIVAL, JSON.stringify(record) + "\n", "utf-8");
156
+ existingContent.add(factLower);
157
+ inserted++;
158
+ fileInserted++;
159
+ }
160
+
161
+ if (fileInserted > 0) console.log(` ${basename(path)}: +${fileInserted} facts`);
162
+ }
163
+
164
+ console.log(`\nβœ… Migration complete: ${inserted} facts imported, ${skipped} skipped (duplicates)`);
165
+ console.log(`Total archival: ${existingContent.size} records`);
package/index.js CHANGED
@@ -20,7 +20,7 @@ import { existsSync } from "node:fs";
20
20
  import { resolveWorkspace, getCoreSizeLimit, DEFAULT_TOP_K, MAX_TOP_K } from "./lib/paths.js";
21
21
  import { readCore, writeCore, dotGet, dotSet, autoParse } from "./lib/core.js";
22
22
  import { loadArchival, appendRecord, rewriteArchival, archivalPath } from "./lib/archival.js";
23
- import { indexEmbedding, loadEmbeddingCache, saveEmbeddingCache } from "./lib/embedding.js";
23
+ import { indexEmbedding, loadEmbeddingCache, saveEmbeddingCache, backfillEmbeddings } from "./lib/embedding.js";
24
24
  import { hybridSearch } from "./lib/search.js";
25
25
  import { consolidateText } from "./lib/consolidate.js";
26
26
  import { findDuplicates, applyDedup } from "./lib/dedup.js";
@@ -95,6 +95,26 @@ export default definePluginEntry({
95
95
  // Factory ctx has: { sessionKey, workspaceDir, agentId, ... }
96
96
  // ═══════════════════════════════════════════════════════════════════
97
97
 
98
+ // Background: auto-backfill missing embeddings on startup
99
+ const defaultWs = resolveWorkspace(null);
100
+ setTimeout(() => {
101
+ try {
102
+ const records = loadArchival(defaultWs);
103
+ const cache = loadEmbeddingCache(defaultWs);
104
+ const missing = records.filter((r) => r.id && !cache[r.id]).length;
105
+ if (missing > 0) {
106
+ console.error(`[memory-engine] Backfilling ${missing} missing embeddings...`);
107
+ backfillEmbeddings(defaultWs, records, {
108
+ onProgress: (done, total) => {
109
+ if (done % 500 === 0) console.error(`[memory-engine] Embedding backfill: ${done}/${total}`);
110
+ },
111
+ }).then((result) => {
112
+ console.error(`[memory-engine] Backfill complete: ${result.processed} embedded, ${result.errors} errors`);
113
+ }).catch(() => {});
114
+ }
115
+ } catch { /* ignore startup errors */ }
116
+ }, 10000); // delay 10s after gateway start to avoid blocking
117
+
98
118
  // ─── core_memory_read ───
99
119
  api.registerTool(withAgent((agentId) => ({
100
120
  name: "core_memory_read",
package/lib/embedding.js CHANGED
@@ -68,3 +68,67 @@ export async function indexEmbedding(ws, record) {
68
68
  saveEmbeddingCache(ws);
69
69
  }
70
70
  }
71
+
72
+ /**
73
+ * Batch-embed records that are missing from cache.
74
+ * Runs in background with batching (100 per API call) and rate limiting.
75
+ * @param {string} ws - workspace path
76
+ * @param {object[]} records - all archival records
77
+ * @param {object} [options]
78
+ * @param {number} [options.batchSize=100]
79
+ * @param {number} [options.delayMs=200]
80
+ * @param {function} [options.onProgress] - callback(done, total)
81
+ */
82
+ export async function backfillEmbeddings(ws, records, options = {}) {
83
+ const apiKey = resolveApiKey();
84
+ if (!apiKey) return { processed: 0, errors: 0, skipped: 0 };
85
+
86
+ const batchSize = options.batchSize || 100;
87
+ const delayMs = options.delayMs || 200;
88
+ const cache = loadEmbeddingCache(ws);
89
+
90
+ const missing = records.filter((r) => r.id && !cache[r.id]);
91
+ if (missing.length === 0) return { processed: 0, errors: 0, skipped: 0 };
92
+
93
+ let processed = 0;
94
+ let errors = 0;
95
+
96
+ for (let i = 0; i < missing.length; i += batchSize) {
97
+ const batch = missing.slice(i, i + batchSize);
98
+ const texts = batch.map((r) =>
99
+ [r.content, r.entity, ...(r.tags || [])].filter(Boolean).join(" "),
100
+ );
101
+
102
+ try {
103
+ const res = await fetch("https://api.openai.com/v1/embeddings", {
104
+ method: "POST",
105
+ headers: { Authorization: `Bearer ${apiKey}`, "Content-Type": "application/json" },
106
+ body: JSON.stringify({ input: texts, model: EMBEDDING_MODEL, dimensions: EMBEDDING_DIM }),
107
+ });
108
+
109
+ if (!res.ok) { errors++; continue; }
110
+
111
+ const data = await res.json();
112
+ for (let j = 0; j < batch.length; j++) {
113
+ if (data.data?.[j]?.embedding) {
114
+ cache[batch[j].id] = data.data[j].embedding;
115
+ processed++;
116
+ }
117
+ }
118
+
119
+ // Save after each batch
120
+ saveEmbeddingCache(ws);
121
+
122
+ if (options.onProgress) options.onProgress(processed, missing.length);
123
+
124
+ // Rate limit
125
+ if (i + batchSize < missing.length) {
126
+ await new Promise((r) => setTimeout(r, delayMs));
127
+ }
128
+ } catch {
129
+ errors++;
130
+ }
131
+ }
132
+
133
+ return { processed, errors, skipped: missing.length - processed - errors };
134
+ }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@icex-labs/openclaw-memory-engine",
3
- "version": "3.3.2",
3
+ "version": "3.5.0",
4
4
  "description": "MemGPT-style hierarchical memory plugin for OpenClaw β€” core memory block + archival storage with semantic search",
5
5
  "type": "module",
6
6
  "main": "index.js",
package/setup.sh CHANGED
@@ -106,6 +106,40 @@ else
106
106
  echo "⏭️ archival.jsonl already exists ($lines records)"
107
107
  fi
108
108
 
109
+ # --- 3b. Migrate legacy memory files into archival ---
110
+ if command -v node &>/dev/null && [ -f "$PLUGIN_DIR/extras/migrate-legacy.mjs" ]; then
111
+ # Check if there are legacy files to migrate
112
+ legacy_count=0
113
+ [ -f "$WORKSPACE/MEMORY.md" ] && legacy_count=$((legacy_count + 1))
114
+ legacy_count=$((legacy_count + $(ls "$MEMORY_DIR"/*.md 2>/dev/null | wc -l | tr -d ' ')))
115
+ legacy_count=$((legacy_count + $(ls "$MEMORY_DIR"/weekly/*.md 2>/dev/null | wc -l | tr -d ' ')))
116
+ legacy_count=$((legacy_count + $(ls "$MEMORY_DIR"/topics/*.md 2>/dev/null | wc -l | tr -d ' ')))
117
+
118
+ archival_count=$(wc -l < "$MEMORY_DIR/archival.jsonl" 2>/dev/null | tr -d ' ' || echo "0")
119
+
120
+ if [ "$legacy_count" -gt 0 ] && [ "$archival_count" -lt 10 ]; then
121
+ echo ""
122
+ echo "πŸ“¦ Found $legacy_count legacy memory files (MEMORY.md, daily logs, weekly summaries, topics)."
123
+ if $NON_INTERACTIVE; then
124
+ echo " Migrating automatically..."
125
+ node "$PLUGIN_DIR/extras/migrate-legacy.mjs" "$WORKSPACE" 2>&1 | tail -3
126
+ else
127
+ printf " Migrate into archival memory? [Y/n]: "
128
+ read -r migrate_answer
129
+ if [ "${migrate_answer:-Y}" != "n" ] && [ "${migrate_answer:-Y}" != "N" ]; then
130
+ node "$PLUGIN_DIR/extras/migrate-legacy.mjs" "$WORKSPACE" 2>&1 | tail -5
131
+ else
132
+ echo "⏭️ Skipping migration. Run manually later: node $PLUGIN_DIR/extras/migrate-legacy.mjs $WORKSPACE"
133
+ fi
134
+ fi
135
+ echo ""
136
+ else
137
+ if [ "$archival_count" -gt 10 ]; then
138
+ echo "⏭️ Archival already has $archival_count records, skipping migration"
139
+ fi
140
+ fi
141
+ fi
142
+
109
143
  # --- 4. Install memory-maintenance.sh ---
110
144
  SCRIPTS_DIR="$WORKSPACE/scripts"
111
145
  mkdir -p "$SCRIPTS_DIR"