openclaw-memory-alibaba-local 1.0.13 → 1.0.15
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/db.ts +55 -0
- package/index.ts +282 -42
- package/package.json +1 -1
- package/prompts.ts +22 -7
package/db.ts
CHANGED
|
@@ -1201,6 +1201,61 @@ export class MemoryDB {
|
|
|
1201
1201
|
return rows.length > 0;
|
|
1202
1202
|
}
|
|
1203
1203
|
|
|
1204
|
+
/**
|
|
1205
|
+
* Whether this agent+session already has user-memory rows whose embeddings are ≥ minScore similar to the query.
|
|
1206
|
+
* Used to suppress near-duplicate inserts when the userImage LLM issues insert instead of skip/update.
|
|
1207
|
+
*/
|
|
1208
|
+
async hasHighlySimilarUserMemoryInSession(
|
|
1209
|
+
agentId: string,
|
|
1210
|
+
sessionId: string,
|
|
1211
|
+
vectors: number[][],
|
|
1212
|
+
minScore: number,
|
|
1213
|
+
): Promise<boolean> {
|
|
1214
|
+
if (vectors.length === 0) {
|
|
1215
|
+
return false;
|
|
1216
|
+
}
|
|
1217
|
+
await this.ensureInitialized();
|
|
1218
|
+
await this.refreshToLatest();
|
|
1219
|
+
const hits = await this.searchMerged(agentId, vectors, 28, minScore, [...USER_MEMORY_CATEGORIES]);
|
|
1220
|
+
const sid = normSessionId(sessionId);
|
|
1221
|
+
for (const h of hits) {
|
|
1222
|
+
if (normSessionId(h.entry.sessionId) !== sid) {
|
|
1223
|
+
continue;
|
|
1224
|
+
}
|
|
1225
|
+
if (h.score >= minScore) {
|
|
1226
|
+
return true;
|
|
1227
|
+
}
|
|
1228
|
+
}
|
|
1229
|
+
return false;
|
|
1230
|
+
}
|
|
1231
|
+
|
|
1232
|
+
/**
|
|
1233
|
+
* Same as {@link hasHighlySimilarUserMemoryInSession} but scoped to `world_fact` rows (session-local dedup on re-send).
|
|
1234
|
+
*/
|
|
1235
|
+
async hasHighlySimilarWorldFactInSession(
|
|
1236
|
+
agentId: string,
|
|
1237
|
+
sessionId: string,
|
|
1238
|
+
vectors: number[][],
|
|
1239
|
+
minScore: number,
|
|
1240
|
+
): Promise<boolean> {
|
|
1241
|
+
if (vectors.length === 0) {
|
|
1242
|
+
return false;
|
|
1243
|
+
}
|
|
1244
|
+
await this.ensureInitialized();
|
|
1245
|
+
await this.refreshToLatest();
|
|
1246
|
+
const hits = await this.searchMerged(agentId, vectors, 28, minScore, [WORLD_FACT]);
|
|
1247
|
+
const sid = normSessionId(sessionId);
|
|
1248
|
+
for (const h of hits) {
|
|
1249
|
+
if (normSessionId(h.entry.sessionId) !== sid) {
|
|
1250
|
+
continue;
|
|
1251
|
+
}
|
|
1252
|
+
if (h.score >= minScore) {
|
|
1253
|
+
return true;
|
|
1254
|
+
}
|
|
1255
|
+
}
|
|
1256
|
+
return false;
|
|
1257
|
+
}
|
|
1258
|
+
|
|
1204
1259
|
/**
|
|
1205
1260
|
* Vector search with multiple query embeddings; merge by category + text (chunk 行共享同一逻辑正文), keep max score.
|
|
1206
1261
|
*/
|
package/index.ts
CHANGED
|
@@ -181,13 +181,6 @@ function formatRelevantMemoriesContext(
|
|
|
181
181
|
].join("\n");
|
|
182
182
|
}
|
|
183
183
|
|
|
184
|
-
function getThresholdForCategory(cfg: MemoryConfig, category: MemoryCategory): number {
|
|
185
|
-
if (isUserMemoryCategory(category) || isFullContextSourceCategory(category) || category === FULL_CONTEXT_MEMORY) {
|
|
186
|
-
return cfg.similarityThresholdUserMemory;
|
|
187
|
-
}
|
|
188
|
-
return cfg.similarityThresholdSelfImproving;
|
|
189
|
-
}
|
|
190
|
-
|
|
191
184
|
/** 精简日志:仅记录 tag + prompt 字符数,不贴原文。 */
|
|
192
185
|
function logLlmCall(tag: string, promptChars: number): void {
|
|
193
186
|
console.debug(`[openclaw-memory-alibaba-local] llm ${tag} prompt (${promptChars} chars)`);
|
|
@@ -532,8 +525,8 @@ async function extractUserMemoriesWithLLM(
|
|
|
532
525
|
for (const item of list) {
|
|
533
526
|
const text = typeof item.text === "string" ? item.text.trim() : "";
|
|
534
527
|
if (text.length >= 10 && text.length <= 2000) {
|
|
535
|
-
// importance: 0.7 if text mentions User
|
|
536
|
-
const importance = /\bUser\b
|
|
528
|
+
// importance: 0.7 if text mentions User / 用户, 0.5 otherwise
|
|
529
|
+
const importance = /\bUser\b|用户/.test(text) ? 0.7 : 0.5;
|
|
537
530
|
out.push({ category: USER_MEMORY_FACT, text, importance });
|
|
538
531
|
}
|
|
539
532
|
}
|
|
@@ -1214,6 +1207,198 @@ async function runAgentEndCapture(
|
|
|
1214
1207
|
saveAgentEndCursorMap(lancedbDir, map);
|
|
1215
1208
|
}
|
|
1216
1209
|
|
|
1210
|
+
/** Strip leading bracketed date tags (e.g. [YYYY-MM-DD], [as of …]) from unified extraction lines. */
|
|
1211
|
+
function stripExtractionDatePrefixes(text: string): string {
|
|
1212
|
+
let t = text.trim();
|
|
1213
|
+
const datedPrefix = /^\[[^\]]*\]\s*/u;
|
|
1214
|
+
while (datedPrefix.test(t)) {
|
|
1215
|
+
t = t.replace(datedPrefix, "").trim();
|
|
1216
|
+
}
|
|
1217
|
+
return t;
|
|
1218
|
+
}
|
|
1219
|
+
|
|
1220
|
+
/** Normalize extraction text for comparing whether world vs user items state the same fact (same `agent_end` batch). */
|
|
1221
|
+
function normalizeCoreForPersonalWorldDedup(text: string): string {
|
|
1222
|
+
return stripExtractionDatePrefixes(text)
|
|
1223
|
+
.replace(/\bUser\b/gi, "")
|
|
1224
|
+
.replace(/用户/g, "")
|
|
1225
|
+
.replace(/\s+/g, "")
|
|
1226
|
+
.toLowerCase();
|
|
1227
|
+
}
|
|
1228
|
+
|
|
1229
|
+
/**
|
|
1230
|
+
* True if two extractions likely duplicate the same fact across personal vs world routing.
|
|
1231
|
+
* Conservative: avoids dropping distinct world facts that merely mention similar words.
|
|
1232
|
+
*/
|
|
1233
|
+
function personalWorldCoreOverlaps(a: string, b: string): boolean {
|
|
1234
|
+
const ca = normalizeCoreForPersonalWorldDedup(a);
|
|
1235
|
+
const cb = normalizeCoreForPersonalWorldDedup(b);
|
|
1236
|
+
const minLen = 12;
|
|
1237
|
+
if (ca.length < minLen || cb.length < minLen) {
|
|
1238
|
+
return false;
|
|
1239
|
+
}
|
|
1240
|
+
if (ca.includes(cb) || cb.includes(ca)) {
|
|
1241
|
+
return true;
|
|
1242
|
+
}
|
|
1243
|
+
const sa = new Set([...ca]);
|
|
1244
|
+
let inter = 0;
|
|
1245
|
+
for (const ch of cb) {
|
|
1246
|
+
if (sa.has(ch)) inter++;
|
|
1247
|
+
}
|
|
1248
|
+
const ratio = inter / Math.min(ca.length, cb.length);
|
|
1249
|
+
return ratio > 0.55;
|
|
1250
|
+
}
|
|
1251
|
+
|
|
1252
|
+
/**
|
|
1253
|
+
* Same-turn / same-extractor-call: drop subset-redundant lines (keep longer) before user/world image LLM.
|
|
1254
|
+
* Uses normalized cores; minCoreLen avoids over-merging short fragments.
|
|
1255
|
+
*/
|
|
1256
|
+
function dedupeExtractionBatchBySubstringContainment(
|
|
1257
|
+
items: LLMExtractionItem[],
|
|
1258
|
+
minCoreLen: number,
|
|
1259
|
+
): LLMExtractionItem[] {
|
|
1260
|
+
const out: LLMExtractionItem[] = [];
|
|
1261
|
+
for (const item of items) {
|
|
1262
|
+
const ca = normalizeCoreForPersonalWorldDedup(item.text);
|
|
1263
|
+
if (ca.length < minCoreLen) {
|
|
1264
|
+
out.push(item);
|
|
1265
|
+
continue;
|
|
1266
|
+
}
|
|
1267
|
+
let handled = false;
|
|
1268
|
+
for (let i = 0; i < out.length; i++) {
|
|
1269
|
+
const prev = out[i]!;
|
|
1270
|
+
const cb = normalizeCoreForPersonalWorldDedup(prev.text);
|
|
1271
|
+
if (cb.length < minCoreLen) {
|
|
1272
|
+
continue;
|
|
1273
|
+
}
|
|
1274
|
+
if (ca.includes(cb) || cb.includes(ca)) {
|
|
1275
|
+
out[i] = item.text.length >= prev.text.length ? item : prev;
|
|
1276
|
+
handled = true;
|
|
1277
|
+
break;
|
|
1278
|
+
}
|
|
1279
|
+
}
|
|
1280
|
+
if (!handled) {
|
|
1281
|
+
out.push(item);
|
|
1282
|
+
}
|
|
1283
|
+
}
|
|
1284
|
+
return out;
|
|
1285
|
+
}
|
|
1286
|
+
|
|
1287
|
+
function filterWorldItemsAgainstUserItems(
|
|
1288
|
+
eventItems: LLMExtractionItem[],
|
|
1289
|
+
userItems: LLMExtractionItem[],
|
|
1290
|
+
): LLMExtractionItem[] {
|
|
1291
|
+
if (userItems.length === 0 || eventItems.length === 0) {
|
|
1292
|
+
return eventItems;
|
|
1293
|
+
}
|
|
1294
|
+
return eventItems.filter(
|
|
1295
|
+
(e) => !userItems.some((u) => personalWorldCoreOverlaps(e.text, u.text)),
|
|
1296
|
+
);
|
|
1297
|
+
}
|
|
1298
|
+
|
|
1299
|
+
/**
|
|
1300
|
+
* Hard-drop lines that the iteration guide expects as empty (trivia, one-shot tasks, pure greetings).
|
|
1301
|
+
* Complements the LLM prompt; blocks mistaken world_fact rows like generic astronomy facts.
|
|
1302
|
+
*/
|
|
1303
|
+
function isNoiseExtractionText(text: string): boolean {
|
|
1304
|
+
const core = stripExtractionDatePrefixes(text).trim();
|
|
1305
|
+
if (!core) {
|
|
1306
|
+
return true;
|
|
1307
|
+
}
|
|
1308
|
+
if (/^(你好|您好|hi|hello|hey)\b[!!。.??\s]*$/iu.test(core)) {
|
|
1309
|
+
return true;
|
|
1310
|
+
}
|
|
1311
|
+
if (!/\bUser\b|用户/.test(core)) {
|
|
1312
|
+
if (/地球围绕太阳|地球绕着太阳|围着太阳转|绕太阳/u.test(core)) {
|
|
1313
|
+
return true;
|
|
1314
|
+
}
|
|
1315
|
+
if (/^1\s*\+\s*1\b/u.test(core)) {
|
|
1316
|
+
return true;
|
|
1317
|
+
}
|
|
1318
|
+
if (/python.*list.*tuple|list\s+and\s+tuple.*(区别|difference)/iu.test(core)) {
|
|
1319
|
+
return true;
|
|
1320
|
+
}
|
|
1321
|
+
if (/(帮我)?查一下.*天气|check.*weather/u.test(core)) {
|
|
1322
|
+
return true;
|
|
1323
|
+
}
|
|
1324
|
+
if (/翻译一下这句话|translate\s+(this|the\s+sentence)/iu.test(core)) {
|
|
1325
|
+
return true;
|
|
1326
|
+
}
|
|
1327
|
+
if (/帮我把这段代码|bug\s*修|生成.*ppt|ppt\s*大纲|outline\s*for\s*a\s*ppt/iu.test(core)) {
|
|
1328
|
+
return true;
|
|
1329
|
+
}
|
|
1330
|
+
}
|
|
1331
|
+
return false;
|
|
1332
|
+
}
|
|
1333
|
+
|
|
1334
|
+
function filterNoiseExtractions(items: LLMExtractionItem[]): LLMExtractionItem[] {
|
|
1335
|
+
return items.filter((item) => !isNoiseExtractionText(item.text));
|
|
1336
|
+
}
|
|
1337
|
+
|
|
1338
|
+
/** Min cosine-similarity (0–1) for treating a candidate insert as a near-duplicate of existing user memory in the same session. */
|
|
1339
|
+
function nearDuplicateUserInsertMinScore(cfg: MemoryConfig): number {
|
|
1340
|
+
return Math.min(0.91, Math.max(0.78, cfg.similarityThresholdUserMemory + 0.28));
|
|
1341
|
+
}
|
|
1342
|
+
|
|
1343
|
+
function nearDuplicateWorldInsertMinScore(cfg: MemoryConfig): number {
|
|
1344
|
+
return Math.min(0.91, Math.max(0.82, cfg.similarityThresholdUserMemory + 0.22));
|
|
1345
|
+
}
|
|
1346
|
+
|
|
1347
|
+
/** Treat text with/without trailing CJK/Latin sentence punctuation as the same for exact dedup. */
|
|
1348
|
+
function userMemoryTextDedupVariants(text: string): string[] {
|
|
1349
|
+
const t = text.trim();
|
|
1350
|
+
const stripped = t.replace(/[。..!!??;;]+$/u, "").trim();
|
|
1351
|
+
if (stripped.length === 0) {
|
|
1352
|
+
return [t];
|
|
1353
|
+
}
|
|
1354
|
+
if (stripped === t) {
|
|
1355
|
+
return [t];
|
|
1356
|
+
}
|
|
1357
|
+
return [t, stripped];
|
|
1358
|
+
}
|
|
1359
|
+
|
|
1360
|
+
async function existsUserSemanticDuplicateAnyVariant(
|
|
1361
|
+
db: MemoryDB,
|
|
1362
|
+
agentId: string,
|
|
1363
|
+
sessionKey: string,
|
|
1364
|
+
category: UserMemoryCategory,
|
|
1365
|
+
memText: string,
|
|
1366
|
+
): Promise<boolean> {
|
|
1367
|
+
for (const v of userMemoryTextDedupVariants(memText)) {
|
|
1368
|
+
if (await db.existsSemanticDuplicate(agentId, sessionKey, category, v)) {
|
|
1369
|
+
return true;
|
|
1370
|
+
}
|
|
1371
|
+
}
|
|
1372
|
+
return false;
|
|
1373
|
+
}
|
|
1374
|
+
|
|
1375
|
+
/**
|
|
1376
|
+
* Personal memories must not carry date tags; must start with `User` or `用户` per iteration guide.
|
|
1377
|
+
*/
|
|
1378
|
+
function normalizePersonalMemoryTextForStore(text: string): string {
|
|
1379
|
+
let t = stripExtractionDatePrefixes(text).trim();
|
|
1380
|
+
if (typeof t.normalize === "function") {
|
|
1381
|
+
t = t.normalize("NFC");
|
|
1382
|
+
}
|
|
1383
|
+
t = t.replace(/\u00a0/g, " ").replace(/\s+/g, " ").trim();
|
|
1384
|
+
if (t.length < 2) {
|
|
1385
|
+
return t;
|
|
1386
|
+
}
|
|
1387
|
+
if (/^User\b/u.test(t) || /^用户/u.test(t)) {
|
|
1388
|
+
return t;
|
|
1389
|
+
}
|
|
1390
|
+
const hasCjk = /\p{Script=Han}/u.test(t);
|
|
1391
|
+
if (hasCjk) {
|
|
1392
|
+
const body = t.replace(/^我(的)?/u, "").trim();
|
|
1393
|
+
if (body.length === 0) {
|
|
1394
|
+
return `用户${t}`;
|
|
1395
|
+
}
|
|
1396
|
+
const joiner = /^[,。!?、:]/.test(body) ? "" : "";
|
|
1397
|
+
return `用户${joiner}${body}`;
|
|
1398
|
+
}
|
|
1399
|
+
return `User ${t}`;
|
|
1400
|
+
}
|
|
1401
|
+
|
|
1217
1402
|
/** User memory from raw user message texts (agent_end user delta). */
|
|
1218
1403
|
async function captureUserMemoryFromInboundTexts(
|
|
1219
1404
|
cfg: MemoryConfig,
|
|
@@ -1261,7 +1446,7 @@ async function captureUserMemoryFromInboundTexts(
|
|
|
1261
1446
|
const toSend = texts.filter((t) => t.length >= 5 && t.length <= cfg.captureMaxChars);
|
|
1262
1447
|
if (toSend.length === 0) return;
|
|
1263
1448
|
|
|
1264
|
-
|
|
1449
|
+
let extractions = await extractUserMemoriesWithLLM(
|
|
1265
1450
|
cfg.llm,
|
|
1266
1451
|
toSend,
|
|
1267
1452
|
MAX_AUTO_CAPTURE_LLM,
|
|
@@ -1271,17 +1456,29 @@ async function captureUserMemoryFromInboundTexts(
|
|
|
1271
1456
|
});
|
|
1272
1457
|
if (extractions.length === 0) return;
|
|
1273
1458
|
|
|
1459
|
+
extractions = filterNoiseExtractions(extractions);
|
|
1460
|
+
if (extractions.length === 0) return;
|
|
1461
|
+
|
|
1274
1462
|
// ---- Always split: User-related items vs event items ----
|
|
1275
|
-
|
|
1276
|
-
|
|
1463
|
+
let userItems: LLMExtractionItem[] = [];
|
|
1464
|
+
let eventItems: LLMExtractionItem[] = [];
|
|
1465
|
+
// Match both English "User" and Chinese "用户" to correctly route user preferences
|
|
1466
|
+
const USER_SUBJECT_RE = /\bUser\b|用户/;
|
|
1277
1467
|
for (const item of extractions) {
|
|
1278
|
-
if (
|
|
1468
|
+
if (USER_SUBJECT_RE.test(item.text)) {
|
|
1279
1469
|
userItems.push(item);
|
|
1280
1470
|
} else {
|
|
1281
1471
|
eventItems.push(item);
|
|
1282
1472
|
}
|
|
1283
1473
|
}
|
|
1284
1474
|
|
|
1475
|
+
// Same `agent_end` batch: do not send facts to world that duplicate user-route extractions
|
|
1476
|
+
eventItems = filterWorldItemsAgainstUserItems(eventItems, userItems);
|
|
1477
|
+
|
|
1478
|
+
// Collapse subset-redundant lines from a single extraction pass (reduces userImage / worldImage churn)
|
|
1479
|
+
userItems = dedupeExtractionBatchBySubstringContainment(userItems, 14);
|
|
1480
|
+
eventItems = dedupeExtractionBatchBySubstringContainment(eventItems, 22);
|
|
1481
|
+
|
|
1285
1482
|
// ---- Parallel: event-item pipeline & user-item pipeline ----
|
|
1286
1483
|
const eventPipeline = async () => {
|
|
1287
1484
|
if (eventItems.length === 0) return;
|
|
@@ -1293,6 +1490,18 @@ async function captureUserMemoryFromInboundTexts(
|
|
|
1293
1490
|
if (await db.existsSemanticDuplicate(agentId, sessionKey, e.category, text)) {
|
|
1294
1491
|
continue;
|
|
1295
1492
|
}
|
|
1493
|
+
const { vectors } = await backend.encodeForStorage(text);
|
|
1494
|
+
if (
|
|
1495
|
+
isWorldFact(e.category) &&
|
|
1496
|
+
(await db.hasHighlySimilarWorldFactInSession(
|
|
1497
|
+
agentId,
|
|
1498
|
+
sessionKey,
|
|
1499
|
+
vectors,
|
|
1500
|
+
nearDuplicateWorldInsertMinScore(cfg),
|
|
1501
|
+
))
|
|
1502
|
+
) {
|
|
1503
|
+
continue;
|
|
1504
|
+
}
|
|
1296
1505
|
await storeOneCaptureItem(agentId, { category: e.category, text, importance: e.importance }, cfg, db, backend, {
|
|
1297
1506
|
userId,
|
|
1298
1507
|
sessionId: sessionKey,
|
|
@@ -1313,7 +1522,7 @@ async function captureUserMemoryFromInboundTexts(
|
|
|
1313
1522
|
|
|
1314
1523
|
// 2. Per-item recall: for each event item recall top-3 similar existing world_facts, then dedup
|
|
1315
1524
|
const recallMinScore = Math.max(0.5, cfg.similarityThresholdUserMemory - 0.35);
|
|
1316
|
-
const PER_ITEM_RECALL =
|
|
1525
|
+
const PER_ITEM_RECALL = 10;
|
|
1317
1526
|
const candidateMap = new Map<string, MemorySearchResult>();
|
|
1318
1527
|
for (const er of embeddingResults) {
|
|
1319
1528
|
const perItemHits = er.vectors.length > 0
|
|
@@ -1363,15 +1572,17 @@ async function captureUserMemoryFromInboundTexts(
|
|
|
1363
1572
|
continue;
|
|
1364
1573
|
}
|
|
1365
1574
|
|
|
1575
|
+
const worldText = truncateForCapture(action.text, cfg.captureMaxChars);
|
|
1576
|
+
|
|
1366
1577
|
if (action.action === "update") {
|
|
1367
1578
|
const hit = existingCandidates.find((c) => c.entry.id === action.memoryId);
|
|
1368
1579
|
if (hit) {
|
|
1369
1580
|
await deleteSimilarLogicalMemory(db, agentId, hit.entry.sessionId, hit);
|
|
1370
1581
|
deleteCount++;
|
|
1371
1582
|
}
|
|
1372
|
-
const { vectors } = await backend.encodeForStorage(
|
|
1583
|
+
const { vectors } = await backend.encodeForStorage(worldText);
|
|
1373
1584
|
const rows = buildChunkRows(
|
|
1374
|
-
{ category: WORLD_FACT as MemoryCategory, text:
|
|
1585
|
+
{ category: WORLD_FACT as MemoryCategory, text: worldText, importance: action.importance },
|
|
1375
1586
|
vectors,
|
|
1376
1587
|
{ userId, sessionId: sessionKey },
|
|
1377
1588
|
);
|
|
@@ -1379,9 +1590,19 @@ async function captureUserMemoryFromInboundTexts(
|
|
|
1379
1590
|
insertCount++;
|
|
1380
1591
|
} else {
|
|
1381
1592
|
// insert
|
|
1382
|
-
|
|
1593
|
+
if (await db.existsSemanticDuplicate(agentId, sessionKey, WORLD_FACT, worldText)) {
|
|
1594
|
+
continue;
|
|
1595
|
+
}
|
|
1596
|
+
const { vectors } = await backend.encodeForStorage(worldText);
|
|
1597
|
+
const wMin = nearDuplicateWorldInsertMinScore(cfg);
|
|
1598
|
+
if (await db.hasHighlySimilarWorldFactInSession(agentId, sessionKey, vectors, wMin)) {
|
|
1599
|
+
console.debug(
|
|
1600
|
+
`[openclaw-memory-alibaba-local] skip world insert (near-duplicate in session, minScore=${wMin})`,
|
|
1601
|
+
);
|
|
1602
|
+
continue;
|
|
1603
|
+
}
|
|
1383
1604
|
const rows = buildChunkRows(
|
|
1384
|
-
{ category: WORLD_FACT as MemoryCategory, text:
|
|
1605
|
+
{ category: WORLD_FACT as MemoryCategory, text: worldText, importance: action.importance },
|
|
1385
1606
|
vectors,
|
|
1386
1607
|
{ userId, sessionId: sessionKey },
|
|
1387
1608
|
);
|
|
@@ -1409,8 +1630,13 @@ async function captureUserMemoryFromInboundTexts(
|
|
|
1409
1630
|
if (!cfg.llm) {
|
|
1410
1631
|
console.warn(`[openclaw-memory-alibaba-local] no LLM configured, user items bypass UserImageExtraction`);
|
|
1411
1632
|
for (const e of userItems) {
|
|
1412
|
-
const text = truncateForCapture(e.text, cfg.captureMaxChars);
|
|
1413
|
-
if (await db
|
|
1633
|
+
const text = truncateForCapture(normalizePersonalMemoryTextForStore(e.text), cfg.captureMaxChars);
|
|
1634
|
+
if (await existsUserSemanticDuplicateAnyVariant(db, agentId, sessionKey, e.category, text)) {
|
|
1635
|
+
continue;
|
|
1636
|
+
}
|
|
1637
|
+
const { vectors } = await backend.encodeForStorage(text);
|
|
1638
|
+
const ndMin = nearDuplicateUserInsertMinScore(cfg);
|
|
1639
|
+
if (await db.hasHighlySimilarUserMemoryInSession(agentId, sessionKey, vectors, ndMin)) {
|
|
1414
1640
|
continue;
|
|
1415
1641
|
}
|
|
1416
1642
|
await storeOneCaptureItem(agentId, { category: e.category, text, importance: e.importance }, cfg, db, backend, {
|
|
@@ -1424,17 +1650,17 @@ async function captureUserMemoryFromInboundTexts(
|
|
|
1424
1650
|
// 1. Batch embed all new extractions
|
|
1425
1651
|
const embeddingResults: { item: LLMExtractionItem; vectors: number[][] }[] = [];
|
|
1426
1652
|
for (const item of userItems) {
|
|
1427
|
-
const truncated = truncateForCapture(item.text, cfg.captureMaxChars);
|
|
1653
|
+
const truncated = truncateForCapture(normalizePersonalMemoryTextForStore(item.text), cfg.captureMaxChars);
|
|
1428
1654
|
const { vectors } = await backend.encodeForStorage(truncated);
|
|
1429
1655
|
embeddingResults.push({ item: { ...item, text: truncated }, vectors });
|
|
1430
1656
|
}
|
|
1431
1657
|
|
|
1432
|
-
// 2. Recall top
|
|
1658
|
+
// 2. Recall top similar existing memories for ALL new extractions (agentId global, USER_MEMORY scope)
|
|
1433
1659
|
const allVectors = embeddingResults.flatMap((r) => r.vectors);
|
|
1434
1660
|
const recallMinScore = Math.max(0.5, cfg.similarityThresholdUserMemory - 0.35);
|
|
1435
1661
|
console.debug(`[openclaw-memory-alibaba-local] userImageExtraction recall: ${allVectors.length} query vectors, minScore=${recallMinScore}`);
|
|
1436
1662
|
const existingCandidates = allVectors.length > 0
|
|
1437
|
-
? await db.searchMerged(agentId, allVectors,
|
|
1663
|
+
? await db.searchMerged(agentId, allVectors, 40, recallMinScore, [...USER_MEMORY_CATEGORIES])
|
|
1438
1664
|
: [];
|
|
1439
1665
|
if (existingCandidates.length > 0) {
|
|
1440
1666
|
console.debug(`[openclaw-memory-alibaba-local] userImageExtraction recall found ${existingCandidates.length} candidates: ${existingCandidates.map((c) => `[${c.score.toFixed(3)}] ${c.entry.text.slice(0, 60)}`).join(" | ")}`);
|
|
@@ -1469,23 +1695,41 @@ async function captureUserMemoryFromInboundTexts(
|
|
|
1469
1695
|
continue;
|
|
1470
1696
|
}
|
|
1471
1697
|
|
|
1698
|
+
const memText = truncateForCapture(
|
|
1699
|
+
normalizePersonalMemoryTextForStore(action.text),
|
|
1700
|
+
cfg.captureMaxChars,
|
|
1701
|
+
);
|
|
1702
|
+
|
|
1703
|
+
if (action.action === "insert") {
|
|
1704
|
+
if (await existsUserSemanticDuplicateAnyVariant(db, agentId, sessionKey, action.category, memText)) {
|
|
1705
|
+
continue;
|
|
1706
|
+
}
|
|
1707
|
+
}
|
|
1708
|
+
|
|
1472
1709
|
if (action.action === "update") {
|
|
1473
1710
|
const hit = existingCandidates.find((c) => c.entry.id === action.memoryId);
|
|
1474
1711
|
if (hit) {
|
|
1475
1712
|
await deleteSimilarLogicalMemory(db, agentId, hit.entry.sessionId, hit);
|
|
1476
1713
|
}
|
|
1477
|
-
const { vectors } = await backend.encodeForStorage(
|
|
1714
|
+
const { vectors } = await backend.encodeForStorage(memText);
|
|
1478
1715
|
const rows = buildChunkRows(
|
|
1479
|
-
{ category: action.category, text:
|
|
1716
|
+
{ category: action.category, text: memText, importance: action.importance },
|
|
1480
1717
|
vectors,
|
|
1481
1718
|
{ userId, sessionId: sessionKey },
|
|
1482
1719
|
);
|
|
1483
1720
|
await db.storeMany(agentId, rows);
|
|
1484
1721
|
} else {
|
|
1485
1722
|
// insert
|
|
1486
|
-
const { vectors } = await backend.encodeForStorage(
|
|
1723
|
+
const { vectors } = await backend.encodeForStorage(memText);
|
|
1724
|
+
const ndMin = nearDuplicateUserInsertMinScore(cfg);
|
|
1725
|
+
if (await db.hasHighlySimilarUserMemoryInSession(agentId, sessionKey, vectors, ndMin)) {
|
|
1726
|
+
console.debug(
|
|
1727
|
+
`[openclaw-memory-alibaba-local] skip user insert (near-duplicate in session, minScore=${ndMin})`,
|
|
1728
|
+
);
|
|
1729
|
+
continue;
|
|
1730
|
+
}
|
|
1487
1731
|
const rows = buildChunkRows(
|
|
1488
|
-
{ category: action.category, text:
|
|
1732
|
+
{ category: action.category, text: memText, importance: action.importance },
|
|
1489
1733
|
vectors,
|
|
1490
1734
|
{ userId, sessionId: sessionKey },
|
|
1491
1735
|
);
|
|
@@ -1646,21 +1890,16 @@ async function storeOneCaptureItem(
|
|
|
1646
1890
|
if (vectors.length === 0) {
|
|
1647
1891
|
throw new Error("openclaw-memory-alibaba-local: encodeForStorage returned no vectors");
|
|
1648
1892
|
}
|
|
1649
|
-
const threshold = getThresholdForCategory(cfg, item.category);
|
|
1650
1893
|
const dedupCategories = getDedupCategories(item.category);
|
|
1651
1894
|
const rows = buildChunkRows(item, vectors, options);
|
|
1652
1895
|
|
|
1653
|
-
|
|
1654
|
-
|
|
1655
|
-
|
|
1656
|
-
|
|
1657
|
-
|
|
1658
|
-
|
|
1659
|
-
|
|
1660
|
-
}
|
|
1661
|
-
|
|
1662
|
-
// With conflict_process: simple similarity-based dedup (batch merge handles LLM dedup for user memory)
|
|
1663
|
-
const recallMinScore = Math.max(0.5, threshold - 0.35);
|
|
1896
|
+
// Guard against near-exact duplicates only (score >= 0.92).
|
|
1897
|
+
// Lower thresholds (e.g. 0.65) would incorrectly treat similar-but-different
|
|
1898
|
+
// items as duplicates (e.g. "likes apples" vs "likes fish").
|
|
1899
|
+
// Semantic dedup for lower-similarity candidates is handled by the LLM CRUD
|
|
1900
|
+
// pipeline (userPipeline / eventPipeline) when memory_duplication_conflict_process is on.
|
|
1901
|
+
const NEAR_EXACT_THRESHOLD = 0.92;
|
|
1902
|
+
const recallMinScore = Math.max(0.5, NEAR_EXACT_THRESHOLD - 0.15);
|
|
1664
1903
|
const candidates = await db.searchMerged(
|
|
1665
1904
|
agentId,
|
|
1666
1905
|
vectors,
|
|
@@ -1668,11 +1907,11 @@ async function storeOneCaptureItem(
|
|
|
1668
1907
|
recallMinScore,
|
|
1669
1908
|
[...dedupCategories],
|
|
1670
1909
|
);
|
|
1671
|
-
if (candidates.length > 0 && candidates[0]!.score >=
|
|
1910
|
+
if (candidates.length > 0 && candidates[0]!.score >= NEAR_EXACT_THRESHOLD) {
|
|
1672
1911
|
await deleteSimilarLogicalMemory(db, agentId, options?.sessionId, candidates[0]!);
|
|
1673
1912
|
}
|
|
1674
1913
|
const stored = await db.storeMany(agentId, rows);
|
|
1675
|
-
return { action: candidates.length > 0 && candidates[0]!.score >=
|
|
1914
|
+
return { action: candidates.length > 0 && candidates[0]!.score >= NEAR_EXACT_THRESHOLD ? "updated" : "created", entry: stored[0]! };
|
|
1676
1915
|
}
|
|
1677
1916
|
|
|
1678
1917
|
// ---------------------------------------------------------------------------
|
|
@@ -1791,7 +2030,7 @@ const memoryPlugin = {
|
|
|
1791
2030
|
}
|
|
1792
2031
|
|
|
1793
2032
|
const getDbAndBackend = (): { db: MemoryDB; backend: EmbeddingBackend } | null =>
|
|
1794
|
-
backend ? { db, backend } : null;
|
|
2033
|
+
backend && db ? { db, backend } : null;
|
|
1795
2034
|
|
|
1796
2035
|
const memoryAdminOpts = backend
|
|
1797
2036
|
? {
|
|
@@ -2181,6 +2420,7 @@ const memoryPlugin = {
|
|
|
2181
2420
|
|
|
2182
2421
|
if (cfg.autoCapture) {
|
|
2183
2422
|
api.on("agent_end", async (event, ctx) => {
|
|
2423
|
+
console.log("[openclaw-memory-alibaba-local] smoke: agent_end hook invoked");
|
|
2184
2424
|
if (!db || !backend) {
|
|
2185
2425
|
return;
|
|
2186
2426
|
}
|
package/package.json
CHANGED
package/prompts.ts
CHANGED
|
@@ -33,6 +33,7 @@ Rewrite the input into clear, complete sentences and output memory entries.
|
|
|
33
33
|
3) TEMPORAL: Prefix every entry with a [date] tag — mandatory.
|
|
34
34
|
4) SUBJECT: Every entry must have an explicit subject after the date prefix.
|
|
35
35
|
5) NO SUBSUMPTION: No entry should be a subset of another entry in this batch. If one sentence already covers the information, do not emit a narrower duplicate.
|
|
36
|
+
6) **Single-message User facts**: When one user utterance states several **independent** User facts, still emit **one line per distinct topic**, but never two lines where one is a **clear subset** of the other — merge sub-facts into the broader line for that topic.
|
|
36
37
|
|
|
37
38
|
# Date Tag Rules
|
|
38
39
|
- Explicit absolute date -> [YYYY-MM-DD], [YYYY-MM], or [YYYY]
|
|
@@ -42,9 +43,12 @@ Rewrite the input into clear, complete sentences and output memory entries.
|
|
|
42
43
|
Do NOT convert relative dates to absolute. No entry may be undated.
|
|
43
44
|
|
|
44
45
|
# Subject Rules
|
|
45
|
-
- If the API caller's own information is explicitly stated (e.g., "I/my/me") -> use
|
|
46
|
+
- If the API caller's own information is explicitly stated (e.g., "I/my/me", Chinese 我/我的/本人) -> you MUST use an explicit subject:
|
|
47
|
+
- English entries: start the sentence (after the date tag) with **User** (e.g. "User's name is …", "User works in …").
|
|
48
|
+
- Chinese entries: start the sentence (after the date tag) with **用户** (e.g. "用户名字是…", "用户在杭州工作").
|
|
46
49
|
- Otherwise, use actual names/nicknames/roles as stated in the text.
|
|
47
50
|
- Do not infer that a named person is the API caller unless the text explicitly indicates it.
|
|
51
|
+
- **No cross-pipeline duplication**: If a fact is about the API caller, emit it ONLY as a User/用户-subject entry. Do NOT also emit a separate entry that restates the same fact as a third-party or generic sentence in the same batch.
|
|
48
52
|
|
|
49
53
|
# Sentence Rewrite Rules
|
|
50
54
|
- Clean up casual or fragmented language into well-formed statements.
|
|
@@ -64,8 +68,11 @@ Do NOT convert relative dates to absolute. No entry may be undated.
|
|
|
64
68
|
|
|
65
69
|
# Exclude
|
|
66
70
|
- Passwords, API keys, credentials
|
|
67
|
-
- Pure small talk ("Bye!", "Take care!")
|
|
68
|
-
- Conversational reactions that carry no new factual information (e.g., "That's great!", "Congrats!", "Sorry to hear that", "Thanks!", expressions of agreement or sympathy without new content)
|
|
71
|
+
- Pure small talk ("Bye!", "Take care!", isolated "你好" / "Hi" with no other content)
|
|
72
|
+
- Conversational reactions that carry no new factual information (e.g., "That's great!", "Congrats!", "Sorry to hear that", "Thanks!", expressions of agreement or sympathy without new content)
|
|
73
|
+
- **Standalone general knowledge** not tied to the User's life, plans, preferences, or identity (e.g. "Earth orbits the Sun in one year", "1+1=2", textbook facts) — omit entirely unless the User explicitly relates the fact to themselves
|
|
74
|
+
- **One-off task requests** with no durable personal stake (e.g. "check today's weather", "translate this sentence", "fix this bug", "generate a PPT outline") — omit unless they reveal lasting preferences or constraints
|
|
75
|
+
- **Pure technical Q&A** with no personal profile content (e.g. language syntax comparisons) — omit`;
|
|
69
76
|
|
|
70
77
|
export const MEMORY_EXTRACTION_FORMAT = `
|
|
71
78
|
|
|
@@ -201,12 +208,18 @@ Only INSERT or UPDATE information that reveals something lasting about the User:
|
|
|
201
208
|
- One-time commands or ephemeral task instructions (e.g. "User asked to run command X", "User requested to install Y")
|
|
202
209
|
- Meta-conversation actions (e.g. "User inquired about ...", "User cancelled ...")
|
|
203
210
|
- Generic observations with no personal relevance
|
|
204
|
-
- Information that is already fully covered by a Store item
|
|
211
|
+
- Information that is already fully covered by a Store item — **even if wording differs** (same fact → SKIP, do not INSERT another row)
|
|
212
|
+
- **Rephrasings** of the same fact already in Store (e.g. Store has "用户在杭州工作" and batch says "用户的工作地点是杭州" → SKIP)
|
|
213
|
+
- **Aggressive dedup**: If Store already contains the same fact with different wording, punctuation, or sentence order, you MUST SKIP — do not add another row. When in doubt between INSERT and SKIP for personal profile facts, prefer **SKIP**.
|
|
205
214
|
|
|
206
215
|
# Refinement Principles
|
|
207
216
|
1. **Prefer the richer version**: When a batch item and a Store item describe the same topic, keep whichever has the most information. If the batch adds new details, UPDATE to include them.
|
|
208
|
-
2. **High cohesion**: Only merge entries about the exact same topic. Entries about different topics stay separate.
|
|
209
|
-
3. **
|
|
217
|
+
2. **High cohesion**: Only merge entries about the exact same specific topic. Entries about different topics stay separate.
|
|
218
|
+
3. **Multiple preferences coexist**: Different concrete items under the same category are NOT duplicates. For example, "User likes apples" and "User likes fish" are two separate preferences — INSERT both, do NOT UPDATE or DELETE one for the other. Only UPDATE/DELETE when the new item truly contradicts or refines the old one (e.g. "User no longer likes apples" replaces "User likes apples").
|
|
219
|
+
4. **Strip date prefixes**: Input text may contain [date] or [as of ...] prefixes — remove them from the output text. User profile memories are evergreen and should not carry temporal tags.
|
|
220
|
+
5. **Subject prefix (mandatory)**: Every INSERT/UPDATE output field "text" MUST begin with **User** (English) or **用户** (Chinese) — the same convention as in the extraction stage. Never output first-person "I/我" as the subject for profile facts.
|
|
221
|
+
6. **Contradictions (critical)**: If the batch **replaces or negates** a Store item on the **same habit or stance** (e.g. Store: User runs every morning / 用户晨跑; batch: User now hates running / 用户讨厌跑步 → **DELETE** the old habit and **INSERT** the new stance; do **not** keep both). Diet: "不吃辣" vs "能接受微辣" → **merge** into one current preference and **DELETE** outdated conflicting lines if needed. Relationship: "女朋友小美" vs "已分手" → **DELETE** girlfriend-as-current and **INSERT** breakup / ex status.
|
|
222
|
+
7. **Store hygiene**: If Store already has **multiple** rows that are clearly the **same repeating fact** (near-identical wording or partial duplicates), use **DELETE** on redundant ids and **UPDATE** one survivor — aim for **one row per stable fact topic** when the batch makes that obvious.
|
|
210
223
|
|
|
211
224
|
# Actions (one per batch index)
|
|
212
225
|
- **INSERT**: New lasting personal info not in Store.
|
|
@@ -270,7 +283,9 @@ Only INSERT or UPDATE information that captures a concrete, verifiable fact or e
|
|
|
270
283
|
1. **Prefer the richer version**: When a batch item and a Store item describe the same topic, keep whichever has the most information. If the batch is richer, DELETE the old Store item and INSERT the batch item. If they are roughly equal, UPDATE to merge details.
|
|
271
284
|
2. **Preserve temporal markers**: Keep [as of ...] or [date] prefixes — world facts are time-sensitive.
|
|
272
285
|
3. **High cohesion**: Only merge entries about the exact same event or fact. Different events stay separate even if related.
|
|
273
|
-
4. **
|
|
286
|
+
4. **Multiple items coexist**: Different concrete items under the same category are NOT duplicates. For example, "likes apples" and "likes fish" are two separate facts — INSERT both. Only DELETE when the new item truly contradicts the old one (e.g. a corrected outcome).
|
|
287
|
+
5. **Contradiction = replace**: If a batch item directly contradicts a Store item (e.g. different outcome), DELETE the old item and INSERT the new one.
|
|
288
|
+
6. **Re-ingestion / replay**: If the batch is essentially the **same article or chat** already captured in Store (user pasted it again), prefer **SKIP** or **UPDATE** to enrich — avoid a second nearly identical INSERT when meaning is unchanged.
|
|
274
289
|
|
|
275
290
|
# Actions (one per batch index)
|
|
276
291
|
- **INSERT**: New world fact not in Store.
|