openclaw-memory-alibaba-local 1.0.14 → 1.0.15
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/db.ts +55 -0
- package/index.ts +277 -30
- package/package.json +1 -1
- package/prompts.ts +17 -4
package/db.ts
CHANGED
|
@@ -1201,6 +1201,61 @@ export class MemoryDB {
|
|
|
1201
1201
|
return rows.length > 0;
|
|
1202
1202
|
}
|
|
1203
1203
|
|
|
1204
|
+
/**
|
|
1205
|
+
* Whether this agent+session already has user-memory rows whose embeddings are ≥ minScore similar to the query.
|
|
1206
|
+
* Used to suppress near-duplicate inserts when the userImage LLM issues insert instead of skip/update.
|
|
1207
|
+
*/
|
|
1208
|
+
async hasHighlySimilarUserMemoryInSession(
|
|
1209
|
+
agentId: string,
|
|
1210
|
+
sessionId: string,
|
|
1211
|
+
vectors: number[][],
|
|
1212
|
+
minScore: number,
|
|
1213
|
+
): Promise<boolean> {
|
|
1214
|
+
if (vectors.length === 0) {
|
|
1215
|
+
return false;
|
|
1216
|
+
}
|
|
1217
|
+
await this.ensureInitialized();
|
|
1218
|
+
await this.refreshToLatest();
|
|
1219
|
+
const hits = await this.searchMerged(agentId, vectors, 28, minScore, [...USER_MEMORY_CATEGORIES]);
|
|
1220
|
+
const sid = normSessionId(sessionId);
|
|
1221
|
+
for (const h of hits) {
|
|
1222
|
+
if (normSessionId(h.entry.sessionId) !== sid) {
|
|
1223
|
+
continue;
|
|
1224
|
+
}
|
|
1225
|
+
if (h.score >= minScore) {
|
|
1226
|
+
return true;
|
|
1227
|
+
}
|
|
1228
|
+
}
|
|
1229
|
+
return false;
|
|
1230
|
+
}
|
|
1231
|
+
|
|
1232
|
+
/**
|
|
1233
|
+
* Same as {@link hasHighlySimilarUserMemoryInSession} but scoped to `world_fact` rows (session-local dedup on re-send).
|
|
1234
|
+
*/
|
|
1235
|
+
async hasHighlySimilarWorldFactInSession(
|
|
1236
|
+
agentId: string,
|
|
1237
|
+
sessionId: string,
|
|
1238
|
+
vectors: number[][],
|
|
1239
|
+
minScore: number,
|
|
1240
|
+
): Promise<boolean> {
|
|
1241
|
+
if (vectors.length === 0) {
|
|
1242
|
+
return false;
|
|
1243
|
+
}
|
|
1244
|
+
await this.ensureInitialized();
|
|
1245
|
+
await this.refreshToLatest();
|
|
1246
|
+
const hits = await this.searchMerged(agentId, vectors, 28, minScore, [WORLD_FACT]);
|
|
1247
|
+
const sid = normSessionId(sessionId);
|
|
1248
|
+
for (const h of hits) {
|
|
1249
|
+
if (normSessionId(h.entry.sessionId) !== sid) {
|
|
1250
|
+
continue;
|
|
1251
|
+
}
|
|
1252
|
+
if (h.score >= minScore) {
|
|
1253
|
+
return true;
|
|
1254
|
+
}
|
|
1255
|
+
}
|
|
1256
|
+
return false;
|
|
1257
|
+
}
|
|
1258
|
+
|
|
1204
1259
|
/**
|
|
1205
1260
|
* Vector search with multiple query embeddings; merge by category + text (chunk 行共享同一逻辑正文), keep max score.
|
|
1206
1261
|
*/
|
package/index.ts
CHANGED
|
@@ -525,8 +525,8 @@ async function extractUserMemoriesWithLLM(
|
|
|
525
525
|
for (const item of list) {
|
|
526
526
|
const text = typeof item.text === "string" ? item.text.trim() : "";
|
|
527
527
|
if (text.length >= 10 && text.length <= 2000) {
|
|
528
|
-
// importance: 0.7 if text mentions User
|
|
529
|
-
const importance = /\bUser\b
|
|
528
|
+
// importance: 0.7 if text mentions User / 用户, 0.5 otherwise
|
|
529
|
+
const importance = /\bUser\b|用户/.test(text) ? 0.7 : 0.5;
|
|
530
530
|
out.push({ category: USER_MEMORY_FACT, text, importance });
|
|
531
531
|
}
|
|
532
532
|
}
|
|
@@ -1175,16 +1175,6 @@ async function runAgentEndCapture(
|
|
|
1175
1175
|
|
|
1176
1176
|
console.debug(`[openclaw-memory-alibaba-local] agentEndCapture fullRows=${fullRows.length} userTexts=${userRawTexts.length} uaLines=${uaLines.length}`);
|
|
1177
1177
|
|
|
1178
|
-
// Save cursor BEFORE extraction so that a failure in extraction/storage
|
|
1179
|
-
// does not leave the cursor un-advanced, which would cause duplicate
|
|
1180
|
-
// full_context rows on retry.
|
|
1181
|
-
map[key] = {
|
|
1182
|
-
version: 2,
|
|
1183
|
-
roleCounts: { ...running },
|
|
1184
|
-
lastMessagesLength: messages.length,
|
|
1185
|
-
};
|
|
1186
|
-
saveAgentEndCursorMap(lancedbDir, map);
|
|
1187
|
-
|
|
1188
1178
|
if (fullRows.length > 0) {
|
|
1189
1179
|
await db.storeMany(
|
|
1190
1180
|
agentId,
|
|
@@ -1208,6 +1198,205 @@ async function runAgentEndCapture(
|
|
|
1208
1198
|
captureUserMemoryFromInboundTexts(cfg, db, backend, agentId, sid, userId, userRawTexts),
|
|
1209
1199
|
captureSelfImprovingFromLines(cfg, db, backend, agentId, sid, userId, uaLines),
|
|
1210
1200
|
]);
|
|
1201
|
+
|
|
1202
|
+
map[key] = {
|
|
1203
|
+
version: 2,
|
|
1204
|
+
roleCounts: { ...running },
|
|
1205
|
+
lastMessagesLength: messages.length,
|
|
1206
|
+
};
|
|
1207
|
+
saveAgentEndCursorMap(lancedbDir, map);
|
|
1208
|
+
}
|
|
1209
|
+
|
|
1210
|
+
/** Strip leading bracketed date tags (e.g. [YYYY-MM-DD], [as of …]) from unified extraction lines. */
|
|
1211
|
+
function stripExtractionDatePrefixes(text: string): string {
|
|
1212
|
+
let t = text.trim();
|
|
1213
|
+
const datedPrefix = /^\[[^\]]*\]\s*/u;
|
|
1214
|
+
while (datedPrefix.test(t)) {
|
|
1215
|
+
t = t.replace(datedPrefix, "").trim();
|
|
1216
|
+
}
|
|
1217
|
+
return t;
|
|
1218
|
+
}
|
|
1219
|
+
|
|
1220
|
+
/** Normalize extraction text for comparing whether world vs user items state the same fact (same `agent_end` batch). */
|
|
1221
|
+
function normalizeCoreForPersonalWorldDedup(text: string): string {
|
|
1222
|
+
return stripExtractionDatePrefixes(text)
|
|
1223
|
+
.replace(/\bUser\b/gi, "")
|
|
1224
|
+
.replace(/用户/g, "")
|
|
1225
|
+
.replace(/\s+/g, "")
|
|
1226
|
+
.toLowerCase();
|
|
1227
|
+
}
|
|
1228
|
+
|
|
1229
|
+
/**
|
|
1230
|
+
* True if two extractions likely duplicate the same fact across personal vs world routing.
|
|
1231
|
+
* Conservative: avoids dropping distinct world facts that merely mention similar words.
|
|
1232
|
+
*/
|
|
1233
|
+
function personalWorldCoreOverlaps(a: string, b: string): boolean {
|
|
1234
|
+
const ca = normalizeCoreForPersonalWorldDedup(a);
|
|
1235
|
+
const cb = normalizeCoreForPersonalWorldDedup(b);
|
|
1236
|
+
const minLen = 12;
|
|
1237
|
+
if (ca.length < minLen || cb.length < minLen) {
|
|
1238
|
+
return false;
|
|
1239
|
+
}
|
|
1240
|
+
if (ca.includes(cb) || cb.includes(ca)) {
|
|
1241
|
+
return true;
|
|
1242
|
+
}
|
|
1243
|
+
const sa = new Set([...ca]);
|
|
1244
|
+
let inter = 0;
|
|
1245
|
+
for (const ch of cb) {
|
|
1246
|
+
if (sa.has(ch)) inter++;
|
|
1247
|
+
}
|
|
1248
|
+
const ratio = inter / Math.min(ca.length, cb.length);
|
|
1249
|
+
return ratio > 0.55;
|
|
1250
|
+
}
|
|
1251
|
+
|
|
1252
|
+
/**
|
|
1253
|
+
* Same-turn / same-extractor-call: drop subset-redundant lines (keep longer) before user/world image LLM.
|
|
1254
|
+
* Uses normalized cores; minCoreLen avoids over-merging short fragments.
|
|
1255
|
+
*/
|
|
1256
|
+
function dedupeExtractionBatchBySubstringContainment(
|
|
1257
|
+
items: LLMExtractionItem[],
|
|
1258
|
+
minCoreLen: number,
|
|
1259
|
+
): LLMExtractionItem[] {
|
|
1260
|
+
const out: LLMExtractionItem[] = [];
|
|
1261
|
+
for (const item of items) {
|
|
1262
|
+
const ca = normalizeCoreForPersonalWorldDedup(item.text);
|
|
1263
|
+
if (ca.length < minCoreLen) {
|
|
1264
|
+
out.push(item);
|
|
1265
|
+
continue;
|
|
1266
|
+
}
|
|
1267
|
+
let handled = false;
|
|
1268
|
+
for (let i = 0; i < out.length; i++) {
|
|
1269
|
+
const prev = out[i]!;
|
|
1270
|
+
const cb = normalizeCoreForPersonalWorldDedup(prev.text);
|
|
1271
|
+
if (cb.length < minCoreLen) {
|
|
1272
|
+
continue;
|
|
1273
|
+
}
|
|
1274
|
+
if (ca.includes(cb) || cb.includes(ca)) {
|
|
1275
|
+
out[i] = item.text.length >= prev.text.length ? item : prev;
|
|
1276
|
+
handled = true;
|
|
1277
|
+
break;
|
|
1278
|
+
}
|
|
1279
|
+
}
|
|
1280
|
+
if (!handled) {
|
|
1281
|
+
out.push(item);
|
|
1282
|
+
}
|
|
1283
|
+
}
|
|
1284
|
+
return out;
|
|
1285
|
+
}
|
|
1286
|
+
|
|
1287
|
+
function filterWorldItemsAgainstUserItems(
|
|
1288
|
+
eventItems: LLMExtractionItem[],
|
|
1289
|
+
userItems: LLMExtractionItem[],
|
|
1290
|
+
): LLMExtractionItem[] {
|
|
1291
|
+
if (userItems.length === 0 || eventItems.length === 0) {
|
|
1292
|
+
return eventItems;
|
|
1293
|
+
}
|
|
1294
|
+
return eventItems.filter(
|
|
1295
|
+
(e) => !userItems.some((u) => personalWorldCoreOverlaps(e.text, u.text)),
|
|
1296
|
+
);
|
|
1297
|
+
}
|
|
1298
|
+
|
|
1299
|
+
/**
|
|
1300
|
+
* Hard-drop lines that the iteration guide expects as empty (trivia, one-shot tasks, pure greetings).
|
|
1301
|
+
* Complements the LLM prompt; blocks mistaken world_fact rows like generic astronomy facts.
|
|
1302
|
+
*/
|
|
1303
|
+
function isNoiseExtractionText(text: string): boolean {
|
|
1304
|
+
const core = stripExtractionDatePrefixes(text).trim();
|
|
1305
|
+
if (!core) {
|
|
1306
|
+
return true;
|
|
1307
|
+
}
|
|
1308
|
+
if (/^(你好|您好|hi|hello|hey)\b[!!。.??\s]*$/iu.test(core)) {
|
|
1309
|
+
return true;
|
|
1310
|
+
}
|
|
1311
|
+
if (!/\bUser\b|用户/.test(core)) {
|
|
1312
|
+
if (/地球围绕太阳|地球绕着太阳|围着太阳转|绕太阳/u.test(core)) {
|
|
1313
|
+
return true;
|
|
1314
|
+
}
|
|
1315
|
+
if (/^1\s*\+\s*1\b/u.test(core)) {
|
|
1316
|
+
return true;
|
|
1317
|
+
}
|
|
1318
|
+
if (/python.*list.*tuple|list\s+and\s+tuple.*(区别|difference)/iu.test(core)) {
|
|
1319
|
+
return true;
|
|
1320
|
+
}
|
|
1321
|
+
if (/(帮我)?查一下.*天气|check.*weather/u.test(core)) {
|
|
1322
|
+
return true;
|
|
1323
|
+
}
|
|
1324
|
+
if (/翻译一下这句话|translate\s+(this|the\s+sentence)/iu.test(core)) {
|
|
1325
|
+
return true;
|
|
1326
|
+
}
|
|
1327
|
+
if (/帮我把这段代码|bug\s*修|生成.*ppt|ppt\s*大纲|outline\s*for\s*a\s*ppt/iu.test(core)) {
|
|
1328
|
+
return true;
|
|
1329
|
+
}
|
|
1330
|
+
}
|
|
1331
|
+
return false;
|
|
1332
|
+
}
|
|
1333
|
+
|
|
1334
|
+
function filterNoiseExtractions(items: LLMExtractionItem[]): LLMExtractionItem[] {
|
|
1335
|
+
return items.filter((item) => !isNoiseExtractionText(item.text));
|
|
1336
|
+
}
|
|
1337
|
+
|
|
1338
|
+
/** Min cosine-similarity (0–1) for treating a candidate insert as a near-duplicate of existing user memory in the same session. */
|
|
1339
|
+
function nearDuplicateUserInsertMinScore(cfg: MemoryConfig): number {
|
|
1340
|
+
return Math.min(0.91, Math.max(0.78, cfg.similarityThresholdUserMemory + 0.28));
|
|
1341
|
+
}
|
|
1342
|
+
|
|
1343
|
+
function nearDuplicateWorldInsertMinScore(cfg: MemoryConfig): number {
|
|
1344
|
+
return Math.min(0.91, Math.max(0.82, cfg.similarityThresholdUserMemory + 0.22));
|
|
1345
|
+
}
|
|
1346
|
+
|
|
1347
|
+
/** Treat text with/without trailing CJK/Latin sentence punctuation as the same for exact dedup. */
|
|
1348
|
+
function userMemoryTextDedupVariants(text: string): string[] {
|
|
1349
|
+
const t = text.trim();
|
|
1350
|
+
const stripped = t.replace(/[。..!!??;;]+$/u, "").trim();
|
|
1351
|
+
if (stripped.length === 0) {
|
|
1352
|
+
return [t];
|
|
1353
|
+
}
|
|
1354
|
+
if (stripped === t) {
|
|
1355
|
+
return [t];
|
|
1356
|
+
}
|
|
1357
|
+
return [t, stripped];
|
|
1358
|
+
}
|
|
1359
|
+
|
|
1360
|
+
async function existsUserSemanticDuplicateAnyVariant(
|
|
1361
|
+
db: MemoryDB,
|
|
1362
|
+
agentId: string,
|
|
1363
|
+
sessionKey: string,
|
|
1364
|
+
category: UserMemoryCategory,
|
|
1365
|
+
memText: string,
|
|
1366
|
+
): Promise<boolean> {
|
|
1367
|
+
for (const v of userMemoryTextDedupVariants(memText)) {
|
|
1368
|
+
if (await db.existsSemanticDuplicate(agentId, sessionKey, category, v)) {
|
|
1369
|
+
return true;
|
|
1370
|
+
}
|
|
1371
|
+
}
|
|
1372
|
+
return false;
|
|
1373
|
+
}
|
|
1374
|
+
|
|
1375
|
+
/**
|
|
1376
|
+
* Personal memories must not carry date tags; must start with `User` or `用户` per iteration guide.
|
|
1377
|
+
*/
|
|
1378
|
+
function normalizePersonalMemoryTextForStore(text: string): string {
|
|
1379
|
+
let t = stripExtractionDatePrefixes(text).trim();
|
|
1380
|
+
if (typeof t.normalize === "function") {
|
|
1381
|
+
t = t.normalize("NFC");
|
|
1382
|
+
}
|
|
1383
|
+
t = t.replace(/\u00a0/g, " ").replace(/\s+/g, " ").trim();
|
|
1384
|
+
if (t.length < 2) {
|
|
1385
|
+
return t;
|
|
1386
|
+
}
|
|
1387
|
+
if (/^User\b/u.test(t) || /^用户/u.test(t)) {
|
|
1388
|
+
return t;
|
|
1389
|
+
}
|
|
1390
|
+
const hasCjk = /\p{Script=Han}/u.test(t);
|
|
1391
|
+
if (hasCjk) {
|
|
1392
|
+
const body = t.replace(/^我(的)?/u, "").trim();
|
|
1393
|
+
if (body.length === 0) {
|
|
1394
|
+
return `用户${t}`;
|
|
1395
|
+
}
|
|
1396
|
+
const joiner = /^[,。!?、:]/.test(body) ? "" : "";
|
|
1397
|
+
return `用户${joiner}${body}`;
|
|
1398
|
+
}
|
|
1399
|
+
return `User ${t}`;
|
|
1211
1400
|
}
|
|
1212
1401
|
|
|
1213
1402
|
/** User memory from raw user message texts (agent_end user delta). */
|
|
@@ -1257,7 +1446,7 @@ async function captureUserMemoryFromInboundTexts(
|
|
|
1257
1446
|
const toSend = texts.filter((t) => t.length >= 5 && t.length <= cfg.captureMaxChars);
|
|
1258
1447
|
if (toSend.length === 0) return;
|
|
1259
1448
|
|
|
1260
|
-
|
|
1449
|
+
let extractions = await extractUserMemoriesWithLLM(
|
|
1261
1450
|
cfg.llm,
|
|
1262
1451
|
toSend,
|
|
1263
1452
|
MAX_AUTO_CAPTURE_LLM,
|
|
@@ -1267,9 +1456,12 @@ async function captureUserMemoryFromInboundTexts(
|
|
|
1267
1456
|
});
|
|
1268
1457
|
if (extractions.length === 0) return;
|
|
1269
1458
|
|
|
1459
|
+
extractions = filterNoiseExtractions(extractions);
|
|
1460
|
+
if (extractions.length === 0) return;
|
|
1461
|
+
|
|
1270
1462
|
// ---- Always split: User-related items vs event items ----
|
|
1271
|
-
|
|
1272
|
-
|
|
1463
|
+
let userItems: LLMExtractionItem[] = [];
|
|
1464
|
+
let eventItems: LLMExtractionItem[] = [];
|
|
1273
1465
|
// Match both English "User" and Chinese "用户" to correctly route user preferences
|
|
1274
1466
|
const USER_SUBJECT_RE = /\bUser\b|用户/;
|
|
1275
1467
|
for (const item of extractions) {
|
|
@@ -1280,6 +1472,13 @@ async function captureUserMemoryFromInboundTexts(
|
|
|
1280
1472
|
}
|
|
1281
1473
|
}
|
|
1282
1474
|
|
|
1475
|
+
// Same `agent_end` batch: do not send facts to world that duplicate user-route extractions
|
|
1476
|
+
eventItems = filterWorldItemsAgainstUserItems(eventItems, userItems);
|
|
1477
|
+
|
|
1478
|
+
// Collapse subset-redundant lines from a single extraction pass (reduces userImage / worldImage churn)
|
|
1479
|
+
userItems = dedupeExtractionBatchBySubstringContainment(userItems, 14);
|
|
1480
|
+
eventItems = dedupeExtractionBatchBySubstringContainment(eventItems, 22);
|
|
1481
|
+
|
|
1283
1482
|
// ---- Parallel: event-item pipeline & user-item pipeline ----
|
|
1284
1483
|
const eventPipeline = async () => {
|
|
1285
1484
|
if (eventItems.length === 0) return;
|
|
@@ -1291,6 +1490,18 @@ async function captureUserMemoryFromInboundTexts(
|
|
|
1291
1490
|
if (await db.existsSemanticDuplicate(agentId, sessionKey, e.category, text)) {
|
|
1292
1491
|
continue;
|
|
1293
1492
|
}
|
|
1493
|
+
const { vectors } = await backend.encodeForStorage(text);
|
|
1494
|
+
if (
|
|
1495
|
+
isWorldFact(e.category) &&
|
|
1496
|
+
(await db.hasHighlySimilarWorldFactInSession(
|
|
1497
|
+
agentId,
|
|
1498
|
+
sessionKey,
|
|
1499
|
+
vectors,
|
|
1500
|
+
nearDuplicateWorldInsertMinScore(cfg),
|
|
1501
|
+
))
|
|
1502
|
+
) {
|
|
1503
|
+
continue;
|
|
1504
|
+
}
|
|
1294
1505
|
await storeOneCaptureItem(agentId, { category: e.category, text, importance: e.importance }, cfg, db, backend, {
|
|
1295
1506
|
userId,
|
|
1296
1507
|
sessionId: sessionKey,
|
|
@@ -1311,7 +1522,7 @@ async function captureUserMemoryFromInboundTexts(
|
|
|
1311
1522
|
|
|
1312
1523
|
// 2. Per-item recall: for each event item recall top-3 similar existing world_facts, then dedup
|
|
1313
1524
|
const recallMinScore = Math.max(0.5, cfg.similarityThresholdUserMemory - 0.35);
|
|
1314
|
-
const PER_ITEM_RECALL =
|
|
1525
|
+
const PER_ITEM_RECALL = 10;
|
|
1315
1526
|
const candidateMap = new Map<string, MemorySearchResult>();
|
|
1316
1527
|
for (const er of embeddingResults) {
|
|
1317
1528
|
const perItemHits = er.vectors.length > 0
|
|
@@ -1361,15 +1572,17 @@ async function captureUserMemoryFromInboundTexts(
|
|
|
1361
1572
|
continue;
|
|
1362
1573
|
}
|
|
1363
1574
|
|
|
1575
|
+
const worldText = truncateForCapture(action.text, cfg.captureMaxChars);
|
|
1576
|
+
|
|
1364
1577
|
if (action.action === "update") {
|
|
1365
1578
|
const hit = existingCandidates.find((c) => c.entry.id === action.memoryId);
|
|
1366
1579
|
if (hit) {
|
|
1367
1580
|
await deleteSimilarLogicalMemory(db, agentId, hit.entry.sessionId, hit);
|
|
1368
1581
|
deleteCount++;
|
|
1369
1582
|
}
|
|
1370
|
-
const { vectors } = await backend.encodeForStorage(
|
|
1583
|
+
const { vectors } = await backend.encodeForStorage(worldText);
|
|
1371
1584
|
const rows = buildChunkRows(
|
|
1372
|
-
{ category: WORLD_FACT as MemoryCategory, text:
|
|
1585
|
+
{ category: WORLD_FACT as MemoryCategory, text: worldText, importance: action.importance },
|
|
1373
1586
|
vectors,
|
|
1374
1587
|
{ userId, sessionId: sessionKey },
|
|
1375
1588
|
);
|
|
@@ -1377,9 +1590,19 @@ async function captureUserMemoryFromInboundTexts(
|
|
|
1377
1590
|
insertCount++;
|
|
1378
1591
|
} else {
|
|
1379
1592
|
// insert
|
|
1380
|
-
|
|
1593
|
+
if (await db.existsSemanticDuplicate(agentId, sessionKey, WORLD_FACT, worldText)) {
|
|
1594
|
+
continue;
|
|
1595
|
+
}
|
|
1596
|
+
const { vectors } = await backend.encodeForStorage(worldText);
|
|
1597
|
+
const wMin = nearDuplicateWorldInsertMinScore(cfg);
|
|
1598
|
+
if (await db.hasHighlySimilarWorldFactInSession(agentId, sessionKey, vectors, wMin)) {
|
|
1599
|
+
console.debug(
|
|
1600
|
+
`[openclaw-memory-alibaba-local] skip world insert (near-duplicate in session, minScore=${wMin})`,
|
|
1601
|
+
);
|
|
1602
|
+
continue;
|
|
1603
|
+
}
|
|
1381
1604
|
const rows = buildChunkRows(
|
|
1382
|
-
{ category: WORLD_FACT as MemoryCategory, text:
|
|
1605
|
+
{ category: WORLD_FACT as MemoryCategory, text: worldText, importance: action.importance },
|
|
1383
1606
|
vectors,
|
|
1384
1607
|
{ userId, sessionId: sessionKey },
|
|
1385
1608
|
);
|
|
@@ -1407,8 +1630,13 @@ async function captureUserMemoryFromInboundTexts(
|
|
|
1407
1630
|
if (!cfg.llm) {
|
|
1408
1631
|
console.warn(`[openclaw-memory-alibaba-local] no LLM configured, user items bypass UserImageExtraction`);
|
|
1409
1632
|
for (const e of userItems) {
|
|
1410
|
-
const text = truncateForCapture(e.text, cfg.captureMaxChars);
|
|
1411
|
-
if (await db
|
|
1633
|
+
const text = truncateForCapture(normalizePersonalMemoryTextForStore(e.text), cfg.captureMaxChars);
|
|
1634
|
+
if (await existsUserSemanticDuplicateAnyVariant(db, agentId, sessionKey, e.category, text)) {
|
|
1635
|
+
continue;
|
|
1636
|
+
}
|
|
1637
|
+
const { vectors } = await backend.encodeForStorage(text);
|
|
1638
|
+
const ndMin = nearDuplicateUserInsertMinScore(cfg);
|
|
1639
|
+
if (await db.hasHighlySimilarUserMemoryInSession(agentId, sessionKey, vectors, ndMin)) {
|
|
1412
1640
|
continue;
|
|
1413
1641
|
}
|
|
1414
1642
|
await storeOneCaptureItem(agentId, { category: e.category, text, importance: e.importance }, cfg, db, backend, {
|
|
@@ -1422,17 +1650,17 @@ async function captureUserMemoryFromInboundTexts(
|
|
|
1422
1650
|
// 1. Batch embed all new extractions
|
|
1423
1651
|
const embeddingResults: { item: LLMExtractionItem; vectors: number[][] }[] = [];
|
|
1424
1652
|
for (const item of userItems) {
|
|
1425
|
-
const truncated = truncateForCapture(item.text, cfg.captureMaxChars);
|
|
1653
|
+
const truncated = truncateForCapture(normalizePersonalMemoryTextForStore(item.text), cfg.captureMaxChars);
|
|
1426
1654
|
const { vectors } = await backend.encodeForStorage(truncated);
|
|
1427
1655
|
embeddingResults.push({ item: { ...item, text: truncated }, vectors });
|
|
1428
1656
|
}
|
|
1429
1657
|
|
|
1430
|
-
// 2. Recall top
|
|
1658
|
+
// 2. Recall top similar existing memories for ALL new extractions (agentId global, USER_MEMORY scope)
|
|
1431
1659
|
const allVectors = embeddingResults.flatMap((r) => r.vectors);
|
|
1432
1660
|
const recallMinScore = Math.max(0.5, cfg.similarityThresholdUserMemory - 0.35);
|
|
1433
1661
|
console.debug(`[openclaw-memory-alibaba-local] userImageExtraction recall: ${allVectors.length} query vectors, minScore=${recallMinScore}`);
|
|
1434
1662
|
const existingCandidates = allVectors.length > 0
|
|
1435
|
-
? await db.searchMerged(agentId, allVectors,
|
|
1663
|
+
? await db.searchMerged(agentId, allVectors, 40, recallMinScore, [...USER_MEMORY_CATEGORIES])
|
|
1436
1664
|
: [];
|
|
1437
1665
|
if (existingCandidates.length > 0) {
|
|
1438
1666
|
console.debug(`[openclaw-memory-alibaba-local] userImageExtraction recall found ${existingCandidates.length} candidates: ${existingCandidates.map((c) => `[${c.score.toFixed(3)}] ${c.entry.text.slice(0, 60)}`).join(" | ")}`);
|
|
@@ -1467,23 +1695,41 @@ async function captureUserMemoryFromInboundTexts(
|
|
|
1467
1695
|
continue;
|
|
1468
1696
|
}
|
|
1469
1697
|
|
|
1698
|
+
const memText = truncateForCapture(
|
|
1699
|
+
normalizePersonalMemoryTextForStore(action.text),
|
|
1700
|
+
cfg.captureMaxChars,
|
|
1701
|
+
);
|
|
1702
|
+
|
|
1703
|
+
if (action.action === "insert") {
|
|
1704
|
+
if (await existsUserSemanticDuplicateAnyVariant(db, agentId, sessionKey, action.category, memText)) {
|
|
1705
|
+
continue;
|
|
1706
|
+
}
|
|
1707
|
+
}
|
|
1708
|
+
|
|
1470
1709
|
if (action.action === "update") {
|
|
1471
1710
|
const hit = existingCandidates.find((c) => c.entry.id === action.memoryId);
|
|
1472
1711
|
if (hit) {
|
|
1473
1712
|
await deleteSimilarLogicalMemory(db, agentId, hit.entry.sessionId, hit);
|
|
1474
1713
|
}
|
|
1475
|
-
const { vectors } = await backend.encodeForStorage(
|
|
1714
|
+
const { vectors } = await backend.encodeForStorage(memText);
|
|
1476
1715
|
const rows = buildChunkRows(
|
|
1477
|
-
{ category: action.category, text:
|
|
1716
|
+
{ category: action.category, text: memText, importance: action.importance },
|
|
1478
1717
|
vectors,
|
|
1479
1718
|
{ userId, sessionId: sessionKey },
|
|
1480
1719
|
);
|
|
1481
1720
|
await db.storeMany(agentId, rows);
|
|
1482
1721
|
} else {
|
|
1483
1722
|
// insert
|
|
1484
|
-
const { vectors } = await backend.encodeForStorage(
|
|
1723
|
+
const { vectors } = await backend.encodeForStorage(memText);
|
|
1724
|
+
const ndMin = nearDuplicateUserInsertMinScore(cfg);
|
|
1725
|
+
if (await db.hasHighlySimilarUserMemoryInSession(agentId, sessionKey, vectors, ndMin)) {
|
|
1726
|
+
console.debug(
|
|
1727
|
+
`[openclaw-memory-alibaba-local] skip user insert (near-duplicate in session, minScore=${ndMin})`,
|
|
1728
|
+
);
|
|
1729
|
+
continue;
|
|
1730
|
+
}
|
|
1485
1731
|
const rows = buildChunkRows(
|
|
1486
|
-
{ category: action.category, text:
|
|
1732
|
+
{ category: action.category, text: memText, importance: action.importance },
|
|
1487
1733
|
vectors,
|
|
1488
1734
|
{ userId, sessionId: sessionKey },
|
|
1489
1735
|
);
|
|
@@ -1784,7 +2030,7 @@ const memoryPlugin = {
|
|
|
1784
2030
|
}
|
|
1785
2031
|
|
|
1786
2032
|
const getDbAndBackend = (): { db: MemoryDB; backend: EmbeddingBackend } | null =>
|
|
1787
|
-
backend ? { db, backend } : null;
|
|
2033
|
+
backend && db ? { db, backend } : null;
|
|
1788
2034
|
|
|
1789
2035
|
const memoryAdminOpts = backend
|
|
1790
2036
|
? {
|
|
@@ -2174,6 +2420,7 @@ const memoryPlugin = {
|
|
|
2174
2420
|
|
|
2175
2421
|
if (cfg.autoCapture) {
|
|
2176
2422
|
api.on("agent_end", async (event, ctx) => {
|
|
2423
|
+
console.log("[openclaw-memory-alibaba-local] smoke: agent_end hook invoked");
|
|
2177
2424
|
if (!db || !backend) {
|
|
2178
2425
|
return;
|
|
2179
2426
|
}
|
package/package.json
CHANGED
package/prompts.ts
CHANGED
|
@@ -33,6 +33,7 @@ Rewrite the input into clear, complete sentences and output memory entries.
|
|
|
33
33
|
3) TEMPORAL: Prefix every entry with a [date] tag — mandatory.
|
|
34
34
|
4) SUBJECT: Every entry must have an explicit subject after the date prefix.
|
|
35
35
|
5) NO SUBSUMPTION: No entry should be a subset of another entry in this batch. If one sentence already covers the information, do not emit a narrower duplicate.
|
|
36
|
+
6) **Single-message User facts**: When one user utterance states several **independent** User facts, still emit **one line per distinct topic**, but never two lines where one is a **clear subset** of the other — merge sub-facts into the broader line for that topic.
|
|
36
37
|
|
|
37
38
|
# Date Tag Rules
|
|
38
39
|
- Explicit absolute date -> [YYYY-MM-DD], [YYYY-MM], or [YYYY]
|
|
@@ -42,9 +43,12 @@ Rewrite the input into clear, complete sentences and output memory entries.
|
|
|
42
43
|
Do NOT convert relative dates to absolute. No entry may be undated.
|
|
43
44
|
|
|
44
45
|
# Subject Rules
|
|
45
|
-
- If the API caller's own information is explicitly stated (e.g., "I/my/me") -> use
|
|
46
|
+
- If the API caller's own information is explicitly stated (e.g., "I/my/me", Chinese 我/我的/本人) -> you MUST use an explicit subject:
|
|
47
|
+
- English entries: start the sentence (after the date tag) with **User** (e.g. "User's name is …", "User works in …").
|
|
48
|
+
- Chinese entries: start the sentence (after the date tag) with **用户** (e.g. "用户名字是…", "用户在杭州工作").
|
|
46
49
|
- Otherwise, use actual names/nicknames/roles as stated in the text.
|
|
47
50
|
- Do not infer that a named person is the API caller unless the text explicitly indicates it.
|
|
51
|
+
- **No cross-pipeline duplication**: If a fact is about the API caller, emit it ONLY as a User/用户-subject entry. Do NOT also emit a separate entry that restates the same fact as a third-party or generic sentence in the same batch.
|
|
48
52
|
|
|
49
53
|
# Sentence Rewrite Rules
|
|
50
54
|
- Clean up casual or fragmented language into well-formed statements.
|
|
@@ -64,8 +68,11 @@ Do NOT convert relative dates to absolute. No entry may be undated.
|
|
|
64
68
|
|
|
65
69
|
# Exclude
|
|
66
70
|
- Passwords, API keys, credentials
|
|
67
|
-
- Pure small talk ("Bye!", "Take care!")
|
|
68
|
-
- Conversational reactions that carry no new factual information (e.g., "That's great!", "Congrats!", "Sorry to hear that", "Thanks!", expressions of agreement or sympathy without new content)
|
|
71
|
+
- Pure small talk ("Bye!", "Take care!", isolated "你好" / "Hi" with no other content)
|
|
72
|
+
- Conversational reactions that carry no new factual information (e.g., "That's great!", "Congrats!", "Sorry to hear that", "Thanks!", expressions of agreement or sympathy without new content)
|
|
73
|
+
- **Standalone general knowledge** not tied to the User's life, plans, preferences, or identity (e.g. "Earth orbits the Sun in one year", "1+1=2", textbook facts) — omit entirely unless the User explicitly relates the fact to themselves
|
|
74
|
+
- **One-off task requests** with no durable personal stake (e.g. "check today's weather", "translate this sentence", "fix this bug", "generate a PPT outline") — omit unless they reveal lasting preferences or constraints
|
|
75
|
+
- **Pure technical Q&A** with no personal profile content (e.g. language syntax comparisons) — omit`;
|
|
69
76
|
|
|
70
77
|
export const MEMORY_EXTRACTION_FORMAT = `
|
|
71
78
|
|
|
@@ -201,13 +208,18 @@ Only INSERT or UPDATE information that reveals something lasting about the User:
|
|
|
201
208
|
- One-time commands or ephemeral task instructions (e.g. "User asked to run command X", "User requested to install Y")
|
|
202
209
|
- Meta-conversation actions (e.g. "User inquired about ...", "User cancelled ...")
|
|
203
210
|
- Generic observations with no personal relevance
|
|
204
|
-
- Information that is already fully covered by a Store item
|
|
211
|
+
- Information that is already fully covered by a Store item — **even if wording differs** (same fact → SKIP, do not INSERT another row)
|
|
212
|
+
- **Rephrasings** of the same fact already in Store (e.g. Store has "用户在杭州工作" and batch says "用户的工作地点是杭州" → SKIP)
|
|
213
|
+
- **Aggressive dedup**: If Store already contains the same fact with different wording, punctuation, or sentence order, you MUST SKIP — do not add another row. When in doubt between INSERT and SKIP for personal profile facts, prefer **SKIP**.
|
|
205
214
|
|
|
206
215
|
# Refinement Principles
|
|
207
216
|
1. **Prefer the richer version**: When a batch item and a Store item describe the same topic, keep whichever has the most information. If the batch adds new details, UPDATE to include them.
|
|
208
217
|
2. **High cohesion**: Only merge entries about the exact same specific topic. Entries about different topics stay separate.
|
|
209
218
|
3. **Multiple preferences coexist**: Different concrete items under the same category are NOT duplicates. For example, "User likes apples" and "User likes fish" are two separate preferences — INSERT both, do NOT UPDATE or DELETE one for the other. Only UPDATE/DELETE when the new item truly contradicts or refines the old one (e.g. "User no longer likes apples" replaces "User likes apples").
|
|
210
219
|
4. **Strip date prefixes**: Input text may contain [date] or [as of ...] prefixes — remove them from the output text. User profile memories are evergreen and should not carry temporal tags.
|
|
220
|
+
5. **Subject prefix (mandatory)**: Every INSERT/UPDATE output field "text" MUST begin with **User** (English) or **用户** (Chinese) — the same convention as in the extraction stage. Never output first-person "I/我" as the subject for profile facts.
|
|
221
|
+
6. **Contradictions (critical)**: If the batch **replaces or negates** a Store item on the **same habit or stance** (e.g. Store: User runs every morning / 用户晨跑; batch: User now hates running / 用户讨厌跑步 → **DELETE** the old habit and **INSERT** the new stance; do **not** keep both). Diet: "不吃辣" vs "能接受微辣" → **merge** into one current preference and **DELETE** outdated conflicting lines if needed. Relationship: "女朋友小美" vs "已分手" → **DELETE** girlfriend-as-current and **INSERT** breakup / ex status.
|
|
222
|
+
7. **Store hygiene**: If Store already has **multiple** rows that are clearly the **same repeating fact** (near-identical wording or partial duplicates), use **DELETE** on redundant ids and **UPDATE** one survivor — aim for **one row per stable fact topic** when the batch makes that obvious.
|
|
211
223
|
|
|
212
224
|
# Actions (one per batch index)
|
|
213
225
|
- **INSERT**: New lasting personal info not in Store.
|
|
@@ -273,6 +285,7 @@ Only INSERT or UPDATE information that captures a concrete, verifiable fact or e
|
|
|
273
285
|
3. **High cohesion**: Only merge entries about the exact same event or fact. Different events stay separate even if related.
|
|
274
286
|
4. **Multiple items coexist**: Different concrete items under the same category are NOT duplicates. For example, "likes apples" and "likes fish" are two separate facts — INSERT both. Only DELETE when the new item truly contradicts the old one (e.g. a corrected outcome).
|
|
275
287
|
5. **Contradiction = replace**: If a batch item directly contradicts a Store item (e.g. different outcome), DELETE the old item and INSERT the new one.
|
|
288
|
+
6. **Re-ingestion / replay**: If the batch is essentially the **same article or chat** already captured in Store (user pasted it again), prefer **SKIP** or **UPDATE** to enrich — avoid a second nearly identical INSERT when meaning is unchanged.
|
|
276
289
|
|
|
277
290
|
# Actions (one per batch index)
|
|
278
291
|
- **INSERT**: New world fact not in Store.
|