@icex-labs/openclaw-memory-engine 3.4.0 → 3.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/index.js +21 -1
- package/lib/embedding.js +64 -0
- package/package.json +1 -1
package/index.js
CHANGED
|
@@ -20,7 +20,7 @@ import { existsSync } from "node:fs";
|
|
|
20
20
|
import { resolveWorkspace, getCoreSizeLimit, DEFAULT_TOP_K, MAX_TOP_K } from "./lib/paths.js";
|
|
21
21
|
import { readCore, writeCore, dotGet, dotSet, autoParse } from "./lib/core.js";
|
|
22
22
|
import { loadArchival, appendRecord, rewriteArchival, archivalPath } from "./lib/archival.js";
|
|
23
|
-
import { indexEmbedding, loadEmbeddingCache, saveEmbeddingCache } from "./lib/embedding.js";
|
|
23
|
+
import { indexEmbedding, loadEmbeddingCache, saveEmbeddingCache, backfillEmbeddings } from "./lib/embedding.js";
|
|
24
24
|
import { hybridSearch } from "./lib/search.js";
|
|
25
25
|
import { consolidateText } from "./lib/consolidate.js";
|
|
26
26
|
import { findDuplicates, applyDedup } from "./lib/dedup.js";
|
|
@@ -95,6 +95,26 @@ export default definePluginEntry({
|
|
|
95
95
|
// Factory ctx has: { sessionKey, workspaceDir, agentId, ... }
|
|
96
96
|
// ═══════════════════════════════════════════════════════════════════
|
|
97
97
|
|
|
98
|
+
// Background: auto-backfill missing embeddings on startup
|
|
99
|
+
const defaultWs = resolveWorkspace(null);
|
|
100
|
+
setTimeout(() => {
|
|
101
|
+
try {
|
|
102
|
+
const records = loadArchival(defaultWs);
|
|
103
|
+
const cache = loadEmbeddingCache(defaultWs);
|
|
104
|
+
const missing = records.filter((r) => r.id && !cache[r.id]).length;
|
|
105
|
+
if (missing > 0) {
|
|
106
|
+
console.error(`[memory-engine] Backfilling ${missing} missing embeddings...`);
|
|
107
|
+
backfillEmbeddings(defaultWs, records, {
|
|
108
|
+
onProgress: (done, total) => {
|
|
109
|
+
if (done % 500 === 0) console.error(`[memory-engine] Embedding backfill: ${done}/${total}`);
|
|
110
|
+
},
|
|
111
|
+
}).then((result) => {
|
|
112
|
+
console.error(`[memory-engine] Backfill complete: ${result.processed} embedded, ${result.errors} errors`);
|
|
113
|
+
}).catch(() => {});
|
|
114
|
+
}
|
|
115
|
+
} catch { /* ignore startup errors */ }
|
|
116
|
+
}, 10000); // delay 10s after gateway start to avoid blocking
|
|
117
|
+
|
|
98
118
|
// ─── core_memory_read ───
|
|
99
119
|
api.registerTool(withAgent((agentId) => ({
|
|
100
120
|
name: "core_memory_read",
|
package/lib/embedding.js
CHANGED
|
@@ -68,3 +68,67 @@ export async function indexEmbedding(ws, record) {
|
|
|
68
68
|
saveEmbeddingCache(ws);
|
|
69
69
|
}
|
|
70
70
|
}
|
|
71
|
+
|
|
72
|
+
/**
|
|
73
|
+
* Batch-embed records that are missing from cache.
|
|
74
|
+
* Runs in background with batching (100 per API call) and rate limiting.
|
|
75
|
+
* @param {string} ws - workspace path
|
|
76
|
+
* @param {object[]} records - all archival records
|
|
77
|
+
* @param {object} [options]
|
|
78
|
+
* @param {number} [options.batchSize=100]
|
|
79
|
+
* @param {number} [options.delayMs=200]
|
|
80
|
+
* @param {function} [options.onProgress] - callback(done, total)
|
|
81
|
+
*/
|
|
82
|
+
export async function backfillEmbeddings(ws, records, options = {}) {
|
|
83
|
+
const apiKey = resolveApiKey();
|
|
84
|
+
if (!apiKey) return { processed: 0, errors: 0, skipped: 0 };
|
|
85
|
+
|
|
86
|
+
const batchSize = options.batchSize || 100;
|
|
87
|
+
const delayMs = options.delayMs || 200;
|
|
88
|
+
const cache = loadEmbeddingCache(ws);
|
|
89
|
+
|
|
90
|
+
const missing = records.filter((r) => r.id && !cache[r.id]);
|
|
91
|
+
if (missing.length === 0) return { processed: 0, errors: 0, skipped: 0 };
|
|
92
|
+
|
|
93
|
+
let processed = 0;
|
|
94
|
+
let errors = 0;
|
|
95
|
+
|
|
96
|
+
for (let i = 0; i < missing.length; i += batchSize) {
|
|
97
|
+
const batch = missing.slice(i, i + batchSize);
|
|
98
|
+
const texts = batch.map((r) =>
|
|
99
|
+
[r.content, r.entity, ...(r.tags || [])].filter(Boolean).join(" "),
|
|
100
|
+
);
|
|
101
|
+
|
|
102
|
+
try {
|
|
103
|
+
const res = await fetch("https://api.openai.com/v1/embeddings", {
|
|
104
|
+
method: "POST",
|
|
105
|
+
headers: { Authorization: `Bearer ${apiKey}`, "Content-Type": "application/json" },
|
|
106
|
+
body: JSON.stringify({ input: texts, model: EMBEDDING_MODEL, dimensions: EMBEDDING_DIM }),
|
|
107
|
+
});
|
|
108
|
+
|
|
109
|
+
if (!res.ok) { errors++; continue; }
|
|
110
|
+
|
|
111
|
+
const data = await res.json();
|
|
112
|
+
for (let j = 0; j < batch.length; j++) {
|
|
113
|
+
if (data.data?.[j]?.embedding) {
|
|
114
|
+
cache[batch[j].id] = data.data[j].embedding;
|
|
115
|
+
processed++;
|
|
116
|
+
}
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
// Save after each batch
|
|
120
|
+
saveEmbeddingCache(ws);
|
|
121
|
+
|
|
122
|
+
if (options.onProgress) options.onProgress(processed, missing.length);
|
|
123
|
+
|
|
124
|
+
// Rate limit
|
|
125
|
+
if (i + batchSize < missing.length) {
|
|
126
|
+
await new Promise((r) => setTimeout(r, delayMs));
|
|
127
|
+
}
|
|
128
|
+
} catch {
|
|
129
|
+
errors++;
|
|
130
|
+
}
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
return { processed, errors, skipped: missing.length - processed - errors };
|
|
134
|
+
}
|
package/package.json
CHANGED