npm - openclaw-memory-alibaba-local - Versions diffs - 1.0.4 → 1.0.6 - Mend

openclaw-memory-alibaba-local 1.0.4 → 1.0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (4) hide show

package/db.ts CHANGED Viewed

@@ -1400,7 +1400,7 @@ export class MemoryDB {
       try {
         const eid = sqlEscapeLiteral(id);
         await (this.table! as any).update(
-          { createdAt: now },
+          { createdAt: String(now) },
           { where: `id = '${eid}' AND agentId = '${a}' AND category = '${wf}'` },
         );
       } catch (err) {

package/index.ts CHANGED Viewed

@@ -63,6 +63,7 @@ import type { MemoryEntry, MemorySearchResult } from "./db.js";
 import {
   buildMemoryExtractionPrompt,
   buildUserImageExtractionPrompt,
+  buildWorldImageExtractionPrompt,
   SELF_IMPROVING_EXTRACTION_INSTRUCTIONS,
 } from "./prompts.js";
 import { extractUserQueryForRecall, stripForLogicalMemoryExtraction } from "./prompt-strip.js";
@@ -782,6 +783,119 @@ async function extractUserImageWithLLM(
   }
 }
+// ---------------------------------------------------------------------------
+// World image extraction: recall + LLM CRUD for world facts
+// ---------------------------------------------------------------------------
+type WorldImageAction =
+  | { action: "insert"; text: string; importance: number }
+  | { action: "update"; memoryId: string; text: string; importance: number }
+  | { action: "delete"; memoryId: string }
+  | { action: "skip" };
+/**
+ * World image extraction: given N new world-fact extractions + M existing
+ * similar world-fact memories, ask LLM to decide insert / update / skip / delete for each.
+ */
+async function extractWorldImageWithLLM(
+  llmConfig: LLMConfig,
+  newItems: LLMExtractionItem[],
+  existingCandidates: MemorySearchResult[],
+): Promise<WorldImageAction[]> {
+  if (newItems.length === 0) return [];
+  const newForPrompt = newItems.map((item, i) => ({
+    index: i,
+    text: item.text,
+    importance: item.importance,
+  }));
+  const existingForPrompt = existingCandidates.map((r) => ({
+    id: r.entry.id,
+    text: r.entry.text,
+  }));
+  const prompt = buildWorldImageExtractionPrompt(newForPrompt, existingForPrompt);
+  logLlmCall("worldImageExtraction", prompt.length);
+  const openai = new OpenAI({
+    apiKey: llmConfig.apiKey,
+    baseURL: llmConfig.baseUrl,
+  });
+  const completion = await openai.chat.completions.create({
+    model: llmConfig.model,
+    messages: [{ role: "user", content: prompt }],
+    temperature: 0,
+    max_tokens: 8192,
+  });
+  const raw = completion.choices[0]?.message?.content?.trim() ?? "";
+  const existingIdSet = new Set(existingCandidates.map((r) => r.entry.id));
+  try {
+    const parsed = JSON.parse(stripMarkdownJsonFence(raw)) as {
+      actions?: Array<{
+        index?: number;
+        action?: string;
+        text?: string;
+        importance?: unknown;
+        memoryId?: string;
+      }>;
+    };
+    const list = Array.isArray(parsed.actions) ? parsed.actions : [];
+    const resultMap = new Map<number, WorldImageAction[]>();
+    // Default: insert for each index
+    for (let i = 0; i < newItems.length; i++) {
+      resultMap.set(i, [{
+        action: "insert" as const,
+        text: newItems[i]!.text,
+        importance: newItems[i]!.importance,
+      }]);
+    }
+    for (const a of list) {
+      const idx = typeof a.index === "number" ? a.index : -1;
+      if (idx < 0 || idx >= newItems.length) continue;
+      if (a.action === "skip") {
+        resultMap.set(idx, [{ action: "skip" }]);
+      } else if (a.action === "delete" && typeof a.memoryId === "string" && existingIdSet.has(a.memoryId)) {
+        const existing = resultMap.get(idx) ?? [];
+        resultMap.set(idx, [{ action: "delete", memoryId: a.memoryId }, ...existing.filter(x => x.action !== "skip")]);
+      } else if (a.action === "update" && typeof a.memoryId === "string" && existingIdSet.has(a.memoryId)) {
+        const text = typeof a.text === "string" ? a.text.trim() : "";
+        if (text.length < 10) continue;
+        resultMap.set(idx, [{
+          action: "update",
+          memoryId: a.memoryId,
+          text,
+          importance: clampImportance(a.importance ?? newItems[idx]!.importance),
+        }]);
+      } else if (a.action === "insert") {
+        const text = typeof a.text === "string" ? a.text.trim() : "";
+        const finalText = text.length >= 10 ? text : newItems[idx]!.text;
+        resultMap.set(idx, [{
+          action: "insert",
+          text: finalText,
+          importance: clampImportance(a.importance ?? newItems[idx]!.importance),
+        }]);
+      }
+    }
+    const result: WorldImageAction[] = [];
+    for (let i = 0; i < newItems.length; i++) {
+      result.push(...(resultMap.get(i) ?? []));
+    }
+    return result;
+  } catch (err: unknown) {
+    console.warn(`[openclaw-memory-alibaba-local] worldImageExtraction JSON parse failed, fallback insert all: ${err}`);
+    return newItems.map((item) => ({
+      action: "insert" as const,
+      text: item.text,
+      importance: item.importance,
+    }));
+  }
+}
 function shouldCapture(text: string, options?: { maxChars?: number }): boolean {
   const maxChars = options?.maxChars ?? DEFAULT_CAPTURE_MAX_CHARS;
   if (text.length < 10 || text.length > maxChars) return false;
@@ -947,6 +1061,11 @@ type DeltaFullContextRow = {
 /**
  * agent_end: per-role cursors → delta rows by source → LanceDB for full_context_* (shared batchId, no embed / no dedup);
  * then Promise.all(user-memory pipeline on user deltas, self-improving on user+assistant deltas).
+ *
+ * When `cursorOnly` is true the function advances the per-role cursor and persists it
+ * but skips all memory extraction / storage.  This is used for non-user triggers
+ * (heartbeat, cron, memory, …) so the cursor stays in sync with the growing
+ * session messages list without accidentally capturing heartbeat content.
  */
 async function runAgentEndCapture(
   cfg: MemoryConfig,
@@ -957,6 +1076,7 @@ async function runAgentEndCapture(
   userId: string | null,
   messages: unknown[],
   lancedbDir: string,
+  cursorOnly = false,
 ): Promise<void> {
   if (messages.length === 0) {
     return;
@@ -973,6 +1093,25 @@ async function runAgentEndCapture(
   }
   const running: Record<string, number> = { ...saved };
+  // --- cursor-only fast path: count roles then persist without extraction ---
+  if (cursorOnly) {
+    for (const msg of messages) {
+      if (!msg || typeof msg !== "object") continue;
+      const m = msg as Record<string, unknown>;
+      const roleRaw = typeof m.role === "string" ? m.role : "unknown";
+      const roleKey = normalizeRoleForCursor(roleRaw);
+      running[roleKey] = (running[roleKey] ?? 0) + 1;
+    }
+    map[key] = { version: 2, roleCounts: { ...running }, lastMessagesLength: messages.length };
+    saveAgentEndCursorMap(lancedbDir, map);
+    console.log(
+      `[openclaw-memory-alibaba-local] agent_end cursor-only advance (non-user trigger) messages=${messages.length}`,
+    );
+    return;
+  }
+  // --- full capture path (trigger === "user") ---
   const fullRows: DeltaFullContextRow[] = [];
   const userRawTexts: string[] = [];
   const uaLines: string[] = [];
@@ -1133,50 +1272,103 @@ async function captureUserMemoryFromInboundTexts(
   // ---- Parallel: event-item pipeline & user-item pipeline ----
   const eventPipeline = async () => {
     if (eventItems.length === 0) return;
-    if (cfg.memory_duplication_conflict_process) {
-      type EmbeddedItem = { item: LLMExtractionItem; vectors: number[][]; primary: number[] };
-      const embedded: EmbeddedItem[] = [];
-      for (const item of eventItems) {
-        const truncated = truncateForCapture(item.text, cfg.captureMaxChars);
-        const { vectors } = await backend.encodeForStorage(truncated);
-        embedded.push({ item: { ...item, text: truncated }, vectors, primary: vectors[0] });
+    // --- No LLM fallback: simple dedup ---
+    if (!cfg.llm) {
+      for (const e of eventItems) {
+        const text = truncateForCapture(e.text, cfg.captureMaxChars);
+        if (await db.existsSemanticDuplicate(agentId, sessionKey, e.category, text)) {
+          continue;
+        }
+        await storeOneCaptureItem(agentId, { category: e.category, text, importance: e.importance }, cfg, db, backend, {
+          userId,
+          sessionId: sessionKey,
+        });
       }
-      const clusters = greedyCluster(embedded, (e) => e.primary, (e) => extractDatePrefix(e.item.text), 0.85, 3);
-      for (const cluster of clusters) {
-        const text = concatDedupeDate(cluster.map((e) => e.item.text));
-        const importance = Math.max(...cluster.map((e) => e.item.importance));
-        const { vectors } = await backend.encodeForStorage(text);
+      return;
+    }
+    // --- LLM path: recall + CRUD ---
+    // 1. Batch embed all event items
+    const embeddingResults: { item: LLMExtractionItem; vectors: number[][] }[] = [];
+    for (const item of eventItems) {
+      const truncated = truncateForCapture(item.text, cfg.captureMaxChars);
+      const { vectors } = await backend.encodeForStorage(truncated);
+      embeddingResults.push({ item: { ...item, text: truncated }, vectors });
+    }
+    // 2. Recall top-3 similar existing world_facts per item, merge & dedup (cap ~10)
+    const allVectors = embeddingResults.flatMap((r) => r.vectors);
+    const recallMinScore = Math.max(0.5, cfg.similarityThresholdUserMemory - 0.35);
+    const existingCandidates = allVectors.length > 0
+      ? await db.searchMerged(agentId, allVectors, 3, recallMinScore, [WORLD_FACT])
+      : [];
+    // 3. LLM CRUD decision
+    console.log(`[openclaw-memory-alibaba-local] worldImageExtraction input: ${eventItems.length} event items, ${existingCandidates.length} existing candidates`);
+    const worldActions = await extractWorldImageWithLLM(
+      cfg.llm,
+      embeddingResults.map((r) => r.item),
+      existingCandidates,
+    ).catch((err: unknown) => {
+      console.warn(`[openclaw-memory-alibaba-local] worldImageExtraction LLM failed, fallback insert all: ${err}`);
+      return embeddingResults.map((r): WorldImageAction => ({
+        action: "insert" as const,
+        text: r.item.text,
+        importance: r.item.importance,
+      }));
+    });
+    // 4. Execute actions
+    let insertCount = 0;
+    for (const action of worldActions) {
+      if (action.action === "skip") continue;
+      if (action.action === "delete") {
+        const hit = existingCandidates.find((c) => c.entry.id === action.memoryId);
+        if (hit) {
+          await deleteSimilarLogicalMemory(db, agentId, sessionKey, hit);
+        }
+        continue;
+      }
+      if (action.action === "update") {
+        const hit = existingCandidates.find((c) => c.entry.id === action.memoryId);
+        if (hit) {
+          await deleteSimilarLogicalMemory(db, agentId, sessionKey, hit);
+        }
+        const { vectors } = await backend.encodeForStorage(action.text);
         const rows = buildChunkRows(
-          { category: WORLD_FACT as MemoryCategory, text, importance },
+          { category: WORLD_FACT as MemoryCategory, text: action.text, importance: action.importance },
           vectors,
           { userId, sessionId: sessionKey },
         );
         await db.storeMany(agentId, rows);
+        insertCount++;
+      } else {
+        // insert
+        const { vectors } = await backend.encodeForStorage(action.text);
+        const rows = buildChunkRows(
+          { category: WORLD_FACT as MemoryCategory, text: action.text, importance: action.importance },
+          vectors,
+          { userId, sessionId: sessionKey },
+        );
+        await db.storeMany(agentId, rows);
+        insertCount++;
       }
-      console.log(`[openclaw-memory-alibaba-local] clustered ${eventItems.length} event items \u2192 ${clusters.length} entries (max 3 per cluster, threshold 0.85)`);
-      // World fact LRU GC: every 10 insertions
-      worldFactGcCounter++;
+    }
+    console.log(`[openclaw-memory-alibaba-local] worldImageExtraction done: ${worldActions.length} actions, ${insertCount} stored`);
+    // 5. World fact LRU GC: every 10 insertions
+    if (insertCount > 0) {
+      worldFactGcCounter += insertCount;
       if (worldFactGcCounter >= 10) {
         worldFactGcCounter = 0;
         db.gcWorldFact(agentId, 10_000, 30 * 24 * 60 * 60 * 1000, 25_000).catch((err) =>
           console.warn(`[openclaw-memory-alibaba-local] gcWorldFact failed: ${err}`),
         );
       }
-    } else {
-      // Simple dedup path for event items
-      for (const e of eventItems) {
-        const text = truncateForCapture(e.text, cfg.captureMaxChars);
-        if (await db.existsSemanticDuplicate(agentId, sessionKey, e.category, text)) {
-          continue;
-        }
-        await storeOneCaptureItem(agentId, { category: e.category, text, importance: e.importance }, cfg, db, backend, {
-          userId,
-          sessionId: sessionKey,
-        });
-      }
     }
   };
@@ -1942,7 +2134,6 @@ const memoryPlugin = {
     if (cfg.autoCapture) {
       api.on("agent_end", async (event, ctx) => {
-        if ((ctx as { trigger?: string }).trigger !== "user") return;
         if (!db || !backend) {
           return;
         }
@@ -1950,6 +2141,9 @@ const memoryPlugin = {
           return;
         }
+        const trigger = (ctx as { trigger?: string }).trigger;
+        const isUserTrigger = trigger === "user";
         try {
           const tCap0 = Date.now();
           const storageSessionKey = resolveStorageSessionKey(ctx);
@@ -1971,9 +2165,10 @@ const memoryPlugin = {
             userId,
             event.messages,
             resolvedDbPath,
+            !isUserTrigger,
           );
           console.log(
-            `[openclaw-memory-alibaba-local] agent_end capture done totalHookMs=${Date.now() - tCap0} messages=${event.messages.length}`,
+            `[openclaw-memory-alibaba-local] agent_end ${isUserTrigger ? "capture" : "cursor-only"} done totalHookMs=${Date.now() - tCap0} messages=${event.messages.length} trigger=${trigger ?? "unknown"}`,
           );
         } catch (err) {
           console.warn(`[openclaw-memory-alibaba-local] agent_end capture failed: ${String(err)}`);

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "openclaw-memory-alibaba-local",
-  "version": "1.0.4",
+  "version": "1.0.6",
   "description": "OpenClaw memory plugin: local LanceDB + DashScope-compatible embeddings",
   "type": "module",
   "engines": {

package/prompts.ts CHANGED Viewed

@@ -244,3 +244,60 @@ export function buildUserImageExtractionPrompt(
     `\nToday is ${today}.\n\nNew extractions:\n${newSection}\n\nExisting memories:\n${existingSection}\n`
   );
 }
+// ---------------------------------------------------------------------------
+// 4. WORLD IMAGE EXTRACTION (deduplicate & reconcile world facts)
+// ---------------------------------------------------------------------------
+export const WORLD_IMAGE_EXTRACTION_INSTRUCTIONS = `You are a World-Fact Organizer that keeps a concise, non-redundant knowledge base of real-world events, facts, and context mentioned in conversations.
+# Inputs
+- **Batch** (indexed 0..N-1): newly extracted world facts / events.
+- **Store** (with id): existing world-fact memories in the database. May be empty.
+# What to Keep
+Only INSERT or UPDATE information that captures a concrete, verifiable fact or event:
+1. Events — what happened, when, where, who was involved
+2. Factual statements — statistics, dates, locations, outcomes
+3. Contextual knowledge — project status, external conditions, third-party decisions
+# What to SKIP
+- Information already fully covered by a Store item (same meaning, same or less detail)
+- Vague or speculative statements with no concrete fact
+- Purely conversational filler with no informational content
+# Reconciliation Principles
+1. **Prefer the richer version**: When a batch item and a Store item describe the same topic, keep whichever has the most information. If the batch adds new details, UPDATE.
+2. **Preserve temporal markers**: Keep [as of ...] or [date] prefixes — world facts are time-sensitive.
+3. **High cohesion**: Only merge entries about the exact same event or fact. Different events stay separate even if related.
+4. **Contradiction = replace**: If a batch item directly contradicts a Store item (e.g. different outcome), DELETE the old item and INSERT the new one.
+# Actions (one per batch index)
+- **INSERT**: New world fact not in Store.
+- **UPDATE** (memoryId): Store item covers the same event/fact; merge to produce the richer version.
+- **SKIP**: Already fully covered by Store, or not a concrete fact.
+- **DELETE** (memoryId): Batch item contradicts a Store item. Delete old; INSERT new.
+# Output
+Reply with ONLY a JSON object:
+{"actions":[
+  {"index":0,"action":"insert","text":"[2026-04-01] Project X launched v2.0","importance":0.5},
+  {"index":1,"action":"skip"},
+  {"index":2,"action":"update","memoryId":"uuid","text":"[2026-03-28] Company Y acquired Z for $2B, deal finalized","importance":0.5},
+  {"index":3,"action":"delete","memoryId":"uuid"}
+]}
+Every batch index must appear. A single index may produce BOTH delete + insert.
+`;
+export function buildWorldImageExtractionPrompt(
+  newItems: Array<{ index: number; text: string; importance: number }>,
+  existingMemories: Array<{ id: string; text: string }>,
+): string {
+  const today = new Date().toISOString().split("T")[0];
+  const newSection = JSON.stringify(newItems, null, 2);
+  const existingSection = JSON.stringify(existingMemories, null, 2);
+  return (
+    WORLD_IMAGE_EXTRACTION_INSTRUCTIONS +
+    `\nToday is ${today}.\n\nNew extractions:\n${newSection}\n\nExisting memories:\n${existingSection}\n`
+  );
+}