openclaw-memory-alibaba-local 1.0.14 → 1.0.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (4) hide show
  1. package/db.ts +55 -0
  2. package/index.ts +277 -30
  3. package/package.json +1 -1
  4. package/prompts.ts +17 -4
package/db.ts CHANGED
@@ -1201,6 +1201,61 @@ export class MemoryDB {
1201
1201
  return rows.length > 0;
1202
1202
  }
1203
1203
 
1204
+ /**
1205
+ * Whether this agent+session already has user-memory rows whose embeddings are ≥ minScore similar to the query.
1206
+ * Used to suppress near-duplicate inserts when the userImage LLM issues insert instead of skip/update.
1207
+ */
1208
+ async hasHighlySimilarUserMemoryInSession(
1209
+ agentId: string,
1210
+ sessionId: string,
1211
+ vectors: number[][],
1212
+ minScore: number,
1213
+ ): Promise<boolean> {
1214
+ if (vectors.length === 0) {
1215
+ return false;
1216
+ }
1217
+ await this.ensureInitialized();
1218
+ await this.refreshToLatest();
1219
+ const hits = await this.searchMerged(agentId, vectors, 28, minScore, [...USER_MEMORY_CATEGORIES]);
1220
+ const sid = normSessionId(sessionId);
1221
+ for (const h of hits) {
1222
+ if (normSessionId(h.entry.sessionId) !== sid) {
1223
+ continue;
1224
+ }
1225
+ if (h.score >= minScore) {
1226
+ return true;
1227
+ }
1228
+ }
1229
+ return false;
1230
+ }
1231
+
1232
+ /**
1233
+ * Same as {@link hasHighlySimilarUserMemoryInSession} but scoped to `world_fact` rows (session-local dedup on re-send).
1234
+ */
1235
+ async hasHighlySimilarWorldFactInSession(
1236
+ agentId: string,
1237
+ sessionId: string,
1238
+ vectors: number[][],
1239
+ minScore: number,
1240
+ ): Promise<boolean> {
1241
+ if (vectors.length === 0) {
1242
+ return false;
1243
+ }
1244
+ await this.ensureInitialized();
1245
+ await this.refreshToLatest();
1246
+ const hits = await this.searchMerged(agentId, vectors, 28, minScore, [WORLD_FACT]);
1247
+ const sid = normSessionId(sessionId);
1248
+ for (const h of hits) {
1249
+ if (normSessionId(h.entry.sessionId) !== sid) {
1250
+ continue;
1251
+ }
1252
+ if (h.score >= minScore) {
1253
+ return true;
1254
+ }
1255
+ }
1256
+ return false;
1257
+ }
1258
+
1204
1259
  /**
1205
1260
  * Vector search with multiple query embeddings; merge by category + text (chunk 行共享同一逻辑正文), keep max score.
1206
1261
  */
package/index.ts CHANGED
@@ -525,8 +525,8 @@ async function extractUserMemoriesWithLLM(
525
525
  for (const item of list) {
526
526
  const text = typeof item.text === "string" ? item.text.trim() : "";
527
527
  if (text.length >= 10 && text.length <= 2000) {
528
- // importance: 0.7 if text mentions User, 0.5 otherwise
529
- const importance = /\bUser\b/.test(text) ? 0.7 : 0.5;
528
+ // importance: 0.7 if text mentions User / 用户, 0.5 otherwise
529
+ const importance = /\bUser\b|用户/.test(text) ? 0.7 : 0.5;
530
530
  out.push({ category: USER_MEMORY_FACT, text, importance });
531
531
  }
532
532
  }
@@ -1175,16 +1175,6 @@ async function runAgentEndCapture(
1175
1175
 
1176
1176
  console.debug(`[openclaw-memory-alibaba-local] agentEndCapture fullRows=${fullRows.length} userTexts=${userRawTexts.length} uaLines=${uaLines.length}`);
1177
1177
 
1178
- // Save cursor BEFORE extraction so that a failure in extraction/storage
1179
- // does not leave the cursor un-advanced, which would cause duplicate
1180
- // full_context rows on retry.
1181
- map[key] = {
1182
- version: 2,
1183
- roleCounts: { ...running },
1184
- lastMessagesLength: messages.length,
1185
- };
1186
- saveAgentEndCursorMap(lancedbDir, map);
1187
-
1188
1178
  if (fullRows.length > 0) {
1189
1179
  await db.storeMany(
1190
1180
  agentId,
@@ -1208,6 +1198,205 @@ async function runAgentEndCapture(
1208
1198
  captureUserMemoryFromInboundTexts(cfg, db, backend, agentId, sid, userId, userRawTexts),
1209
1199
  captureSelfImprovingFromLines(cfg, db, backend, agentId, sid, userId, uaLines),
1210
1200
  ]);
1201
+
1202
+ map[key] = {
1203
+ version: 2,
1204
+ roleCounts: { ...running },
1205
+ lastMessagesLength: messages.length,
1206
+ };
1207
+ saveAgentEndCursorMap(lancedbDir, map);
1208
+ }
1209
+
1210
+ /** Strip leading bracketed date tags (e.g. [YYYY-MM-DD], [as of …]) from unified extraction lines. */
1211
+ function stripExtractionDatePrefixes(text: string): string {
1212
+ let t = text.trim();
1213
+ const datedPrefix = /^\[[^\]]*\]\s*/u;
1214
+ while (datedPrefix.test(t)) {
1215
+ t = t.replace(datedPrefix, "").trim();
1216
+ }
1217
+ return t;
1218
+ }
1219
+
1220
+ /** Normalize extraction text for comparing whether world vs user items state the same fact (same `agent_end` batch). */
1221
+ function normalizeCoreForPersonalWorldDedup(text: string): string {
1222
+ return stripExtractionDatePrefixes(text)
1223
+ .replace(/\bUser\b/gi, "")
1224
+ .replace(/用户/g, "")
1225
+ .replace(/\s+/g, "")
1226
+ .toLowerCase();
1227
+ }
1228
+
1229
+ /**
1230
+ * True if two extractions likely duplicate the same fact across personal vs world routing.
1231
+ * Conservative: avoids dropping distinct world facts that merely mention similar words.
1232
+ */
1233
+ function personalWorldCoreOverlaps(a: string, b: string): boolean {
1234
+ const ca = normalizeCoreForPersonalWorldDedup(a);
1235
+ const cb = normalizeCoreForPersonalWorldDedup(b);
1236
+ const minLen = 12;
1237
+ if (ca.length < minLen || cb.length < minLen) {
1238
+ return false;
1239
+ }
1240
+ if (ca.includes(cb) || cb.includes(ca)) {
1241
+ return true;
1242
+ }
1243
+ const sa = new Set([...ca]);
1244
+ let inter = 0;
1245
+ for (const ch of cb) {
1246
+ if (sa.has(ch)) inter++;
1247
+ }
1248
+ const ratio = inter / Math.min(ca.length, cb.length);
1249
+ return ratio > 0.55;
1250
+ }
1251
+
1252
+ /**
1253
+ * Same-turn / same-extractor-call: drop subset-redundant lines (keep longer) before user/world image LLM.
1254
+ * Uses normalized cores; minCoreLen avoids over-merging short fragments.
1255
+ */
1256
+ function dedupeExtractionBatchBySubstringContainment(
1257
+ items: LLMExtractionItem[],
1258
+ minCoreLen: number,
1259
+ ): LLMExtractionItem[] {
1260
+ const out: LLMExtractionItem[] = [];
1261
+ for (const item of items) {
1262
+ const ca = normalizeCoreForPersonalWorldDedup(item.text);
1263
+ if (ca.length < minCoreLen) {
1264
+ out.push(item);
1265
+ continue;
1266
+ }
1267
+ let handled = false;
1268
+ for (let i = 0; i < out.length; i++) {
1269
+ const prev = out[i]!;
1270
+ const cb = normalizeCoreForPersonalWorldDedup(prev.text);
1271
+ if (cb.length < minCoreLen) {
1272
+ continue;
1273
+ }
1274
+ if (ca.includes(cb) || cb.includes(ca)) {
1275
+ out[i] = item.text.length >= prev.text.length ? item : prev;
1276
+ handled = true;
1277
+ break;
1278
+ }
1279
+ }
1280
+ if (!handled) {
1281
+ out.push(item);
1282
+ }
1283
+ }
1284
+ return out;
1285
+ }
1286
+
1287
+ function filterWorldItemsAgainstUserItems(
1288
+ eventItems: LLMExtractionItem[],
1289
+ userItems: LLMExtractionItem[],
1290
+ ): LLMExtractionItem[] {
1291
+ if (userItems.length === 0 || eventItems.length === 0) {
1292
+ return eventItems;
1293
+ }
1294
+ return eventItems.filter(
1295
+ (e) => !userItems.some((u) => personalWorldCoreOverlaps(e.text, u.text)),
1296
+ );
1297
+ }
1298
+
1299
+ /**
1300
+ * Hard-drop lines that the iteration guide expects as empty (trivia, one-shot tasks, pure greetings).
1301
+ * Complements the LLM prompt; blocks mistaken world_fact rows like generic astronomy facts.
1302
+ */
1303
+ function isNoiseExtractionText(text: string): boolean {
1304
+ const core = stripExtractionDatePrefixes(text).trim();
1305
+ if (!core) {
1306
+ return true;
1307
+ }
1308
+ if (/^(你好|您好|hi|hello|hey)\b[!!。.??\s]*$/iu.test(core)) {
1309
+ return true;
1310
+ }
1311
+ if (!/\bUser\b|用户/.test(core)) {
1312
+ if (/地球围绕太阳|地球绕着太阳|围着太阳转|绕太阳/u.test(core)) {
1313
+ return true;
1314
+ }
1315
+ if (/^1\s*\+\s*1\b/u.test(core)) {
1316
+ return true;
1317
+ }
1318
+ if (/python.*list.*tuple|list\s+and\s+tuple.*(区别|difference)/iu.test(core)) {
1319
+ return true;
1320
+ }
1321
+ if (/(帮我)?查一下.*天气|check.*weather/u.test(core)) {
1322
+ return true;
1323
+ }
1324
+ if (/翻译一下这句话|translate\s+(this|the\s+sentence)/iu.test(core)) {
1325
+ return true;
1326
+ }
1327
+ if (/帮我把这段代码|bug\s*修|生成.*ppt|ppt\s*大纲|outline\s*for\s*a\s*ppt/iu.test(core)) {
1328
+ return true;
1329
+ }
1330
+ }
1331
+ return false;
1332
+ }
1333
+
1334
+ function filterNoiseExtractions(items: LLMExtractionItem[]): LLMExtractionItem[] {
1335
+ return items.filter((item) => !isNoiseExtractionText(item.text));
1336
+ }
1337
+
1338
+ /** Min cosine-similarity (0–1) for treating a candidate insert as a near-duplicate of existing user memory in the same session. */
1339
+ function nearDuplicateUserInsertMinScore(cfg: MemoryConfig): number {
1340
+ return Math.min(0.91, Math.max(0.78, cfg.similarityThresholdUserMemory + 0.28));
1341
+ }
1342
+
1343
+ function nearDuplicateWorldInsertMinScore(cfg: MemoryConfig): number {
1344
+ return Math.min(0.91, Math.max(0.82, cfg.similarityThresholdUserMemory + 0.22));
1345
+ }
1346
+
1347
+ /** Treat text with/without trailing CJK/Latin sentence punctuation as the same for exact dedup. */
1348
+ function userMemoryTextDedupVariants(text: string): string[] {
1349
+ const t = text.trim();
1350
+ const stripped = t.replace(/[。..!!??;;]+$/u, "").trim();
1351
+ if (stripped.length === 0) {
1352
+ return [t];
1353
+ }
1354
+ if (stripped === t) {
1355
+ return [t];
1356
+ }
1357
+ return [t, stripped];
1358
+ }
1359
+
1360
+ async function existsUserSemanticDuplicateAnyVariant(
1361
+ db: MemoryDB,
1362
+ agentId: string,
1363
+ sessionKey: string,
1364
+ category: UserMemoryCategory,
1365
+ memText: string,
1366
+ ): Promise<boolean> {
1367
+ for (const v of userMemoryTextDedupVariants(memText)) {
1368
+ if (await db.existsSemanticDuplicate(agentId, sessionKey, category, v)) {
1369
+ return true;
1370
+ }
1371
+ }
1372
+ return false;
1373
+ }
1374
+
1375
+ /**
1376
+ * Personal memories must not carry date tags; must start with `User` or `用户` per iteration guide.
1377
+ */
1378
+ function normalizePersonalMemoryTextForStore(text: string): string {
1379
+ let t = stripExtractionDatePrefixes(text).trim();
1380
+ if (typeof t.normalize === "function") {
1381
+ t = t.normalize("NFC");
1382
+ }
1383
+ t = t.replace(/\u00a0/g, " ").replace(/\s+/g, " ").trim();
1384
+ if (t.length < 2) {
1385
+ return t;
1386
+ }
1387
+ if (/^User\b/u.test(t) || /^用户/u.test(t)) {
1388
+ return t;
1389
+ }
1390
+ const hasCjk = /\p{Script=Han}/u.test(t);
1391
+ if (hasCjk) {
1392
+ const body = t.replace(/^我(的)?/u, "").trim();
1393
+ if (body.length === 0) {
1394
+ return `用户${t}`;
1395
+ }
1396
+ const joiner = /^[,。!?、:]/.test(body) ? "" : "";
1397
+ return `用户${joiner}${body}`;
1398
+ }
1399
+ return `User ${t}`;
1211
1400
  }
1212
1401
 
1213
1402
  /** User memory from raw user message texts (agent_end user delta). */
@@ -1257,7 +1446,7 @@ async function captureUserMemoryFromInboundTexts(
1257
1446
  const toSend = texts.filter((t) => t.length >= 5 && t.length <= cfg.captureMaxChars);
1258
1447
  if (toSend.length === 0) return;
1259
1448
 
1260
- const extractions = await extractUserMemoriesWithLLM(
1449
+ let extractions = await extractUserMemoriesWithLLM(
1261
1450
  cfg.llm,
1262
1451
  toSend,
1263
1452
  MAX_AUTO_CAPTURE_LLM,
@@ -1267,9 +1456,12 @@ async function captureUserMemoryFromInboundTexts(
1267
1456
  });
1268
1457
  if (extractions.length === 0) return;
1269
1458
 
1459
+ extractions = filterNoiseExtractions(extractions);
1460
+ if (extractions.length === 0) return;
1461
+
1270
1462
  // ---- Always split: User-related items vs event items ----
1271
- const userItems: LLMExtractionItem[] = [];
1272
- const eventItems: LLMExtractionItem[] = [];
1463
+ let userItems: LLMExtractionItem[] = [];
1464
+ let eventItems: LLMExtractionItem[] = [];
1273
1465
  // Match both English "User" and Chinese "用户" to correctly route user preferences
1274
1466
  const USER_SUBJECT_RE = /\bUser\b|用户/;
1275
1467
  for (const item of extractions) {
@@ -1280,6 +1472,13 @@ async function captureUserMemoryFromInboundTexts(
1280
1472
  }
1281
1473
  }
1282
1474
 
1475
+ // Same `agent_end` batch: do not send facts to world that duplicate user-route extractions
1476
+ eventItems = filterWorldItemsAgainstUserItems(eventItems, userItems);
1477
+
1478
+ // Collapse subset-redundant lines from a single extraction pass (reduces userImage / worldImage churn)
1479
+ userItems = dedupeExtractionBatchBySubstringContainment(userItems, 14);
1480
+ eventItems = dedupeExtractionBatchBySubstringContainment(eventItems, 22);
1481
+
1283
1482
  // ---- Parallel: event-item pipeline & user-item pipeline ----
1284
1483
  const eventPipeline = async () => {
1285
1484
  if (eventItems.length === 0) return;
@@ -1291,6 +1490,18 @@ async function captureUserMemoryFromInboundTexts(
1291
1490
  if (await db.existsSemanticDuplicate(agentId, sessionKey, e.category, text)) {
1292
1491
  continue;
1293
1492
  }
1493
+ const { vectors } = await backend.encodeForStorage(text);
1494
+ if (
1495
+ isWorldFact(e.category) &&
1496
+ (await db.hasHighlySimilarWorldFactInSession(
1497
+ agentId,
1498
+ sessionKey,
1499
+ vectors,
1500
+ nearDuplicateWorldInsertMinScore(cfg),
1501
+ ))
1502
+ ) {
1503
+ continue;
1504
+ }
1294
1505
  await storeOneCaptureItem(agentId, { category: e.category, text, importance: e.importance }, cfg, db, backend, {
1295
1506
  userId,
1296
1507
  sessionId: sessionKey,
@@ -1311,7 +1522,7 @@ async function captureUserMemoryFromInboundTexts(
1311
1522
 
1312
1523
  // 2. Per-item recall: for each event item recall top-3 similar existing world_facts, then dedup
1313
1524
  const recallMinScore = Math.max(0.5, cfg.similarityThresholdUserMemory - 0.35);
1314
- const PER_ITEM_RECALL = 3;
1525
+ const PER_ITEM_RECALL = 10;
1315
1526
  const candidateMap = new Map<string, MemorySearchResult>();
1316
1527
  for (const er of embeddingResults) {
1317
1528
  const perItemHits = er.vectors.length > 0
@@ -1361,15 +1572,17 @@ async function captureUserMemoryFromInboundTexts(
1361
1572
  continue;
1362
1573
  }
1363
1574
 
1575
+ const worldText = truncateForCapture(action.text, cfg.captureMaxChars);
1576
+
1364
1577
  if (action.action === "update") {
1365
1578
  const hit = existingCandidates.find((c) => c.entry.id === action.memoryId);
1366
1579
  if (hit) {
1367
1580
  await deleteSimilarLogicalMemory(db, agentId, hit.entry.sessionId, hit);
1368
1581
  deleteCount++;
1369
1582
  }
1370
- const { vectors } = await backend.encodeForStorage(action.text);
1583
+ const { vectors } = await backend.encodeForStorage(worldText);
1371
1584
  const rows = buildChunkRows(
1372
- { category: WORLD_FACT as MemoryCategory, text: action.text, importance: action.importance },
1585
+ { category: WORLD_FACT as MemoryCategory, text: worldText, importance: action.importance },
1373
1586
  vectors,
1374
1587
  { userId, sessionId: sessionKey },
1375
1588
  );
@@ -1377,9 +1590,19 @@ async function captureUserMemoryFromInboundTexts(
1377
1590
  insertCount++;
1378
1591
  } else {
1379
1592
  // insert
1380
- const { vectors } = await backend.encodeForStorage(action.text);
1593
+ if (await db.existsSemanticDuplicate(agentId, sessionKey, WORLD_FACT, worldText)) {
1594
+ continue;
1595
+ }
1596
+ const { vectors } = await backend.encodeForStorage(worldText);
1597
+ const wMin = nearDuplicateWorldInsertMinScore(cfg);
1598
+ if (await db.hasHighlySimilarWorldFactInSession(agentId, sessionKey, vectors, wMin)) {
1599
+ console.debug(
1600
+ `[openclaw-memory-alibaba-local] skip world insert (near-duplicate in session, minScore=${wMin})`,
1601
+ );
1602
+ continue;
1603
+ }
1381
1604
  const rows = buildChunkRows(
1382
- { category: WORLD_FACT as MemoryCategory, text: action.text, importance: action.importance },
1605
+ { category: WORLD_FACT as MemoryCategory, text: worldText, importance: action.importance },
1383
1606
  vectors,
1384
1607
  { userId, sessionId: sessionKey },
1385
1608
  );
@@ -1407,8 +1630,13 @@ async function captureUserMemoryFromInboundTexts(
1407
1630
  if (!cfg.llm) {
1408
1631
  console.warn(`[openclaw-memory-alibaba-local] no LLM configured, user items bypass UserImageExtraction`);
1409
1632
  for (const e of userItems) {
1410
- const text = truncateForCapture(e.text, cfg.captureMaxChars);
1411
- if (await db.existsSemanticDuplicate(agentId, sessionKey, e.category, text)) {
1633
+ const text = truncateForCapture(normalizePersonalMemoryTextForStore(e.text), cfg.captureMaxChars);
1634
+ if (await existsUserSemanticDuplicateAnyVariant(db, agentId, sessionKey, e.category, text)) {
1635
+ continue;
1636
+ }
1637
+ const { vectors } = await backend.encodeForStorage(text);
1638
+ const ndMin = nearDuplicateUserInsertMinScore(cfg);
1639
+ if (await db.hasHighlySimilarUserMemoryInSession(agentId, sessionKey, vectors, ndMin)) {
1412
1640
  continue;
1413
1641
  }
1414
1642
  await storeOneCaptureItem(agentId, { category: e.category, text, importance: e.importance }, cfg, db, backend, {
@@ -1422,17 +1650,17 @@ async function captureUserMemoryFromInboundTexts(
1422
1650
  // 1. Batch embed all new extractions
1423
1651
  const embeddingResults: { item: LLMExtractionItem; vectors: number[][] }[] = [];
1424
1652
  for (const item of userItems) {
1425
- const truncated = truncateForCapture(item.text, cfg.captureMaxChars);
1653
+ const truncated = truncateForCapture(normalizePersonalMemoryTextForStore(item.text), cfg.captureMaxChars);
1426
1654
  const { vectors } = await backend.encodeForStorage(truncated);
1427
1655
  embeddingResults.push({ item: { ...item, text: truncated }, vectors });
1428
1656
  }
1429
1657
 
1430
- // 2. Recall top-10 similar existing memories for ALL new extractions (agentId global, USER_MEMORY scope)
1658
+ // 2. Recall top similar existing memories for ALL new extractions (agentId global, USER_MEMORY scope)
1431
1659
  const allVectors = embeddingResults.flatMap((r) => r.vectors);
1432
1660
  const recallMinScore = Math.max(0.5, cfg.similarityThresholdUserMemory - 0.35);
1433
1661
  console.debug(`[openclaw-memory-alibaba-local] userImageExtraction recall: ${allVectors.length} query vectors, minScore=${recallMinScore}`);
1434
1662
  const existingCandidates = allVectors.length > 0
1435
- ? await db.searchMerged(agentId, allVectors, 10, recallMinScore, [...USER_MEMORY_CATEGORIES])
1663
+ ? await db.searchMerged(agentId, allVectors, 40, recallMinScore, [...USER_MEMORY_CATEGORIES])
1436
1664
  : [];
1437
1665
  if (existingCandidates.length > 0) {
1438
1666
  console.debug(`[openclaw-memory-alibaba-local] userImageExtraction recall found ${existingCandidates.length} candidates: ${existingCandidates.map((c) => `[${c.score.toFixed(3)}] ${c.entry.text.slice(0, 60)}`).join(" | ")}`);
@@ -1467,23 +1695,41 @@ async function captureUserMemoryFromInboundTexts(
1467
1695
  continue;
1468
1696
  }
1469
1697
 
1698
+ const memText = truncateForCapture(
1699
+ normalizePersonalMemoryTextForStore(action.text),
1700
+ cfg.captureMaxChars,
1701
+ );
1702
+
1703
+ if (action.action === "insert") {
1704
+ if (await existsUserSemanticDuplicateAnyVariant(db, agentId, sessionKey, action.category, memText)) {
1705
+ continue;
1706
+ }
1707
+ }
1708
+
1470
1709
  if (action.action === "update") {
1471
1710
  const hit = existingCandidates.find((c) => c.entry.id === action.memoryId);
1472
1711
  if (hit) {
1473
1712
  await deleteSimilarLogicalMemory(db, agentId, hit.entry.sessionId, hit);
1474
1713
  }
1475
- const { vectors } = await backend.encodeForStorage(action.text);
1714
+ const { vectors } = await backend.encodeForStorage(memText);
1476
1715
  const rows = buildChunkRows(
1477
- { category: action.category, text: action.text, importance: action.importance },
1716
+ { category: action.category, text: memText, importance: action.importance },
1478
1717
  vectors,
1479
1718
  { userId, sessionId: sessionKey },
1480
1719
  );
1481
1720
  await db.storeMany(agentId, rows);
1482
1721
  } else {
1483
1722
  // insert
1484
- const { vectors } = await backend.encodeForStorage(action.text);
1723
+ const { vectors } = await backend.encodeForStorage(memText);
1724
+ const ndMin = nearDuplicateUserInsertMinScore(cfg);
1725
+ if (await db.hasHighlySimilarUserMemoryInSession(agentId, sessionKey, vectors, ndMin)) {
1726
+ console.debug(
1727
+ `[openclaw-memory-alibaba-local] skip user insert (near-duplicate in session, minScore=${ndMin})`,
1728
+ );
1729
+ continue;
1730
+ }
1485
1731
  const rows = buildChunkRows(
1486
- { category: action.category, text: action.text, importance: action.importance },
1732
+ { category: action.category, text: memText, importance: action.importance },
1487
1733
  vectors,
1488
1734
  { userId, sessionId: sessionKey },
1489
1735
  );
@@ -1784,7 +2030,7 @@ const memoryPlugin = {
1784
2030
  }
1785
2031
 
1786
2032
  const getDbAndBackend = (): { db: MemoryDB; backend: EmbeddingBackend } | null =>
1787
- backend ? { db, backend } : null;
2033
+ backend && db ? { db, backend } : null;
1788
2034
 
1789
2035
  const memoryAdminOpts = backend
1790
2036
  ? {
@@ -2174,6 +2420,7 @@ const memoryPlugin = {
2174
2420
 
2175
2421
  if (cfg.autoCapture) {
2176
2422
  api.on("agent_end", async (event, ctx) => {
2423
+ console.log("[openclaw-memory-alibaba-local] smoke: agent_end hook invoked");
2177
2424
  if (!db || !backend) {
2178
2425
  return;
2179
2426
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "openclaw-memory-alibaba-local",
3
- "version": "1.0.14",
3
+ "version": "1.0.15",
4
4
  "description": "OpenClaw memory plugin: local LanceDB + DashScope-compatible embeddings",
5
5
  "type": "module",
6
6
  "engines": {
package/prompts.ts CHANGED
@@ -33,6 +33,7 @@ Rewrite the input into clear, complete sentences and output memory entries.
33
33
  3) TEMPORAL: Prefix every entry with a [date] tag — mandatory.
34
34
  4) SUBJECT: Every entry must have an explicit subject after the date prefix.
35
35
  5) NO SUBSUMPTION: No entry should be a subset of another entry in this batch. If one sentence already covers the information, do not emit a narrower duplicate.
36
+ 6) **Single-message User facts**: When one user utterance states several **independent** User facts, still emit **one line per distinct topic**, but never two lines where one is a **clear subset** of the other — merge sub-facts into the broader line for that topic.
36
37
 
37
38
  # Date Tag Rules
38
39
  - Explicit absolute date -> [YYYY-MM-DD], [YYYY-MM], or [YYYY]
@@ -42,9 +43,12 @@ Rewrite the input into clear, complete sentences and output memory entries.
42
43
  Do NOT convert relative dates to absolute. No entry may be undated.
43
44
 
44
45
  # Subject Rules
45
- - If the API caller's own information is explicitly stated (e.g., "I/my/me") -> use "User" as subject.
46
+ - If the API caller's own information is explicitly stated (e.g., "I/my/me", Chinese 我/我的/本人) -> you MUST use an explicit subject:
47
+ - English entries: start the sentence (after the date tag) with **User** (e.g. "User's name is …", "User works in …").
48
+ - Chinese entries: start the sentence (after the date tag) with **用户** (e.g. "用户名字是…", "用户在杭州工作").
46
49
  - Otherwise, use actual names/nicknames/roles as stated in the text.
47
50
  - Do not infer that a named person is the API caller unless the text explicitly indicates it.
51
+ - **No cross-pipeline duplication**: If a fact is about the API caller, emit it ONLY as a User/用户-subject entry. Do NOT also emit a separate entry that restates the same fact as a third-party or generic sentence in the same batch.
48
52
 
49
53
  # Sentence Rewrite Rules
50
54
  - Clean up casual or fragmented language into well-formed statements.
@@ -64,8 +68,11 @@ Do NOT convert relative dates to absolute. No entry may be undated.
64
68
 
65
69
  # Exclude
66
70
  - Passwords, API keys, credentials
67
- - Pure small talk ("Bye!", "Take care!")
68
- - Conversational reactions that carry no new factual information (e.g., "That's great!", "Congrats!", "Sorry to hear that", "Thanks!", expressions of agreement or sympathy without new content)`;
71
+ - Pure small talk ("Bye!", "Take care!", isolated "你好" / "Hi" with no other content)
72
+ - Conversational reactions that carry no new factual information (e.g., "That's great!", "Congrats!", "Sorry to hear that", "Thanks!", expressions of agreement or sympathy without new content)
73
+ - **Standalone general knowledge** not tied to the User's life, plans, preferences, or identity (e.g. "Earth orbits the Sun in one year", "1+1=2", textbook facts) — omit entirely unless the User explicitly relates the fact to themselves
74
+ - **One-off task requests** with no durable personal stake (e.g. "check today's weather", "translate this sentence", "fix this bug", "generate a PPT outline") — omit unless they reveal lasting preferences or constraints
75
+ - **Pure technical Q&A** with no personal profile content (e.g. language syntax comparisons) — omit`;
69
76
 
70
77
  export const MEMORY_EXTRACTION_FORMAT = `
71
78
 
@@ -201,13 +208,18 @@ Only INSERT or UPDATE information that reveals something lasting about the User:
201
208
  - One-time commands or ephemeral task instructions (e.g. "User asked to run command X", "User requested to install Y")
202
209
  - Meta-conversation actions (e.g. "User inquired about ...", "User cancelled ...")
203
210
  - Generic observations with no personal relevance
204
- - Information that is already fully covered by a Store item
211
+ - Information that is already fully covered by a Store item — **even if wording differs** (same fact → SKIP, do not INSERT another row)
212
+ - **Rephrasings** of the same fact already in Store (e.g. Store has "用户在杭州工作" and batch says "用户的工作地点是杭州" → SKIP)
213
+ - **Aggressive dedup**: If Store already contains the same fact with different wording, punctuation, or sentence order, you MUST SKIP — do not add another row. When in doubt between INSERT and SKIP for personal profile facts, prefer **SKIP**.
205
214
 
206
215
  # Refinement Principles
207
216
  1. **Prefer the richer version**: When a batch item and a Store item describe the same topic, keep whichever has the most information. If the batch adds new details, UPDATE to include them.
208
217
  2. **High cohesion**: Only merge entries about the exact same specific topic. Entries about different topics stay separate.
209
218
  3. **Multiple preferences coexist**: Different concrete items under the same category are NOT duplicates. For example, "User likes apples" and "User likes fish" are two separate preferences — INSERT both, do NOT UPDATE or DELETE one for the other. Only UPDATE/DELETE when the new item truly contradicts or refines the old one (e.g. "User no longer likes apples" replaces "User likes apples").
210
219
  4. **Strip date prefixes**: Input text may contain [date] or [as of ...] prefixes — remove them from the output text. User profile memories are evergreen and should not carry temporal tags.
220
+ 5. **Subject prefix (mandatory)**: Every INSERT/UPDATE output field "text" MUST begin with **User** (English) or **用户** (Chinese) — the same convention as in the extraction stage. Never output first-person "I/我" as the subject for profile facts.
221
+ 6. **Contradictions (critical)**: If the batch **replaces or negates** a Store item on the **same habit or stance** (e.g. Store: User runs every morning / 用户晨跑; batch: User now hates running / 用户讨厌跑步 → **DELETE** the old habit and **INSERT** the new stance; do **not** keep both). Diet: "不吃辣" vs "能接受微辣" → **merge** into one current preference and **DELETE** outdated conflicting lines if needed. Relationship: "女朋友小美" vs "已分手" → **DELETE** girlfriend-as-current and **INSERT** breakup / ex status.
222
+ 7. **Store hygiene**: If Store already has **multiple** rows that are clearly the **same repeating fact** (near-identical wording or partial duplicates), use **DELETE** on redundant ids and **UPDATE** one survivor — aim for **one row per stable fact topic** when the batch makes that obvious.
211
223
 
212
224
  # Actions (one per batch index)
213
225
  - **INSERT**: New lasting personal info not in Store.
@@ -273,6 +285,7 @@ Only INSERT or UPDATE information that captures a concrete, verifiable fact or e
273
285
  3. **High cohesion**: Only merge entries about the exact same event or fact. Different events stay separate even if related.
274
286
  4. **Multiple items coexist**: Different concrete items under the same category are NOT duplicates. For example, "likes apples" and "likes fish" are two separate facts — INSERT both. Only DELETE when the new item truly contradicts the old one (e.g. a corrected outcome).
275
287
  5. **Contradiction = replace**: If a batch item directly contradicts a Store item (e.g. different outcome), DELETE the old item and INSERT the new one.
288
+ 6. **Re-ingestion / replay**: If the batch is essentially the **same article or chat** already captured in Store (user pasted it again), prefer **SKIP** or **UPDATE** to enrich — avoid a second nearly identical INSERT when meaning is unchanged.
276
289
 
277
290
  # Actions (one per batch index)
278
291
  - **INSERT**: New world fact not in Store.