npm - openclaw-memory-alibaba-local - Versions diffs - 0.1.0 - Mend

openclaw-memory-alibaba-local 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

package/openclaw.plugin.json ADDED Viewed

@@ -0,0 +1,228 @@
+{
+  "id": "openclaw-memory-alibaba-local",
+  "name": "openclaw-memory-alibaba-local",
+  "description": "Local LanceDB long-term memory; user_memory_*, self_improving_*; DashScope-friendly defaults; table openclaw_memories_alibaba_local.",
+  "configSchema": {
+    "type": "object",
+    "additionalProperties": false,
+    "properties": {
+      "dbPath": {
+        "type": "string",
+        "default": "",
+        "description": "LanceDB directory (default: ~/.openclaw/memory/lancedb, same as official memory-lancedb)."
+      },
+      "embedding": {
+        "type": "object",
+        "additionalProperties": false,
+        "description": "Omitted or {} → local llama-embedding. If apiKey/model/baseUrl/dimensions set without mode → remote (maxToken/dimensions default in plugin parse).",
+        "properties": {
+          "mode": { "type": "string", "enum": ["local", "remote"], "default": "local" },
+          "commandPrefix": {
+            "type": "string",
+            "description": "Shell command; stdin = text to embed. Default uses llama-embedding + -f /dev/stdin --embd-output-format json."
+          },
+          "dimensions": { "type": "number", "description": "768 default (local); 1024 default (remote) when omitted." },
+          "maxToken": { "type": "number", "description": "2048 default (remote) when omitted." },
+          "apiKey": { "type": "string" },
+          "model": { "type": "string" },
+          "baseUrl": { "type": "string" }
+        }
+      },
+      "memory_duplication_conflict_process": {
+        "type": "boolean",
+        "default": true,
+        "description": "When true (default), LLM decides insert vs update among similar memories; requires llm. Set false to disable."
+      },
+      "llm": {
+        "type": "object",
+        "additionalProperties": false,
+        "description": "When LLM is required, each field can be omitted and filled from host openclaw.json: bailian apiKey/baseUrl and agents.defaults.model.primary (bailian/model → model id without prefix). Plugin values override file defaults.",
+        "properties": {
+          "apiKey": { "type": "string", "default": "" },
+          "model": { "type": "string", "default": "qwen-plus" },
+          "baseUrl": { "type": "string", "default": "https://dashscope.aliyuncs.com/compatible-mode/v1" }
+        }
+      },
+      "similarityThresholdUserMemory": { "type": "number", "default": 0.65 },
+      "similarityThresholdSelfImproving": { "type": "number", "default": 0.62 },
+      "enableFullContextMemory": {
+        "type": "boolean",
+        "default": true,
+        "description": "When true (default), append full_context_* rows on agent_end (incremental cursor). Stored as plain text with a zero vector placeholder (no embedding); not used in vector recall."
+      },
+      "enableSelfImprovingMemory": {
+        "type": "boolean",
+        "default": true,
+        "description": "When true (default), allow self_improving_* write and recall. Set false to disable."
+      },
+      "memoryExtractionMethod": {
+        "type": "string",
+        "enum": ["regex", "llm"],
+        "default": "llm",
+        "description": "regex or llm; llm needs apiKey+model (plugin llm and/or openclaw.json bailian + agents.defaults.model)."
+      },
+      "autoRecall": { "type": "boolean", "default": true },
+      "autoCapture": { "type": "boolean", "default": true },
+      "captureMaxChars": { "type": "number", "default": 50000 },
+      "enableMemoryDecay": {
+        "type": "boolean",
+        "default": true,
+        "description": "When true (default), recall uses time decay (30d half-life, exponential). Set false to disable."
+      },
+      "memoryDecayHalfLifeDays": {
+        "type": "number",
+        "default": 30,
+        "description": "Half-life in days for exponential/linear decay (1–3650)."
+      },
+      "memoryDecayStrategy": {
+        "type": "string",
+        "enum": ["exponential", "linear", "none"],
+        "default": "exponential"
+      },
+      "adminPanelMemoryTypeOptions": {
+        "type": "object",
+        "additionalProperties": false,
+        "description": "管理端「用户记忆 / 自进化记忆 / 全文记忆」Tab 的「记忆类型」筛选项；可写 category + 中文 labelZh。省略则使用内置类别与中文名。",
+        "properties": {
+          "user": {
+            "type": "array",
+            "items": {
+              "type": "object",
+              "additionalProperties": false,
+              "required": ["category", "labelZh"],
+              "properties": {
+                "category": { "type": "string" },
+                "labelZh": { "type": "string" }
+              }
+            }
+          },
+          "self": {
+            "type": "array",
+            "items": {
+              "type": "object",
+              "additionalProperties": false,
+              "required": ["category", "labelZh"],
+              "properties": {
+                "category": { "type": "string" },
+                "labelZh": { "type": "string" }
+              }
+            }
+          },
+          "full": {
+            "type": "array",
+            "items": {
+              "type": "object",
+              "additionalProperties": false,
+              "required": ["category", "labelZh"],
+              "properties": {
+                "category": { "type": "string" },
+                "labelZh": { "type": "string" }
+              }
+            }
+          }
+        }
+      }
+    }
+  },
+  "uiHints": {
+    "dbPath": {
+      "label": "LanceDB Path",
+      "placeholder": "~/.openclaw/memory/lancedb",
+      "help": "Same default as official memory-lancedb; table openclaw_memories_alibaba_local inside this directory."
+    },
+    "embedding.mode": {
+      "label": "Embedding mode",
+      "help": "local: llama-embedding (stdin). remote: OpenAI-compatible HTTP (all remote fields required; first embed may fail if invalid)."
+    },
+    "embedding.commandPrefix": {
+      "label": "Local embed command",
+      "placeholder": "llama-embedding -m ~/.openclaw/... -f /dev/stdin --embd-output-format json",
+      "help": "Must read prompt from stdin; JSON output recommended for parsing."
+    },
+    "embedding.apiKey": {
+      "label": "Embedding API Key (remote)",
+      "sensitive": true,
+      "placeholder": "••••••••",
+      "help": "OpenAI-compatible /embeddings (${VAR} supported)"
+    },
+    "embedding.model": {
+      "label": "Embedding model (remote)",
+      "placeholder": "text-embedding-v3"
+    },
+    "embedding.baseUrl": {
+      "label": "Embedding base URL (remote)",
+      "placeholder": "https://dashscope.aliyuncs.com/compatible-mode/v1"
+    },
+    "embedding.dimensions": {
+      "label": "Embedding dimensions",
+      "placeholder": "768 or 1024"
+    },
+    "embedding.maxToken": {
+      "label": "Max tokens per chunk (approx)",
+      "placeholder": "2048",
+      "help": "Paragraphs exceeding this are split further (char length / 4)."
+    },
+    "llm.apiKey": {
+      "label": "LLM API Key",
+      "sensitive": true,
+      "placeholder": "••••••••",
+      "help": "If empty, uses models.providers.bailian.apiKey from openclaw.json when LLM is required"
+    },
+    "llm.model": {
+      "label": "LLM Model",
+      "placeholder": "qwen-plus",
+      "help": "If empty, uses agents.defaults.model.primary; bailian/name is normalized to name for Chat Completions"
+    },
+    "llm.baseUrl": {
+      "label": "LLM Base URL",
+      "placeholder": "https://dashscope.aliyuncs.com/compatible-mode/v1",
+      "help": "If empty, uses models.providers.bailian.baseUrl, else DashScope compatible default"
+    },
+    "memory_duplication_conflict_process": {
+      "label": "Conflict / Dedup (LLM)",
+      "help": "Default on: LLM decides insert vs update. Turn off to use vector-only dedup."
+    },
+    "similarityThresholdUserMemory": {
+      "label": "User Memory Similarity Threshold",
+      "placeholder": "0.65",
+      "help": "0–1; L2 score = 1/(1+distance), same as official memory-lancedb"
+    },
+    "similarityThresholdSelfImproving": {
+      "label": "Self-Improving Similarity Threshold",
+      "placeholder": "0.62"
+    },
+    "enableFullContextMemory": {
+      "label": "Enable Full Context Memory",
+      "help": "Default on: one row per new transcript message on agent_end (batchId + cursor). No embedding/dedup; zero-vector placeholder only. Not injected into recall."
+    },
+    "enableSelfImprovingMemory": {
+      "label": "Enable Self-Improving Memory",
+      "help": "Default on: learnings / errors / feature requests. Turn off to disable."
+    },
+    "memoryExtractionMethod": {
+      "label": "Extraction Method",
+      "help": "regex or llm (default llm)"
+    },
+    "autoRecall": {
+      "label": "Auto Recall",
+      "help": "Inject memories at conversation start"
+    },
+    "autoCapture": {
+      "label": "Auto Capture",
+      "help": "Store memories at conversation end"
+    },
+    "captureMaxChars": {
+      "label": "Capture Max Chars",
+      "placeholder": "50000",
+      "help": "100–100000"
+    },
+    "enableMemoryDecay": {
+      "label": "Enable Memory Decay",
+      "help": "Default on: older memories score lower on recall. Turn off for flat scores."
+    },
+    "adminPanelMemoryTypeOptions": {
+      "label": "管理端记忆类型筛选项",
+      "help": "可选。按 Tab 配置 { user|self|full: [{ category, labelZh }] }；category 须为该 Tab 合法类别，labelZh 为下拉展示中文。不写则用内置列表与中文名。"
+    }
+  }
+}

package/package.json ADDED Viewed

@@ -0,0 +1,51 @@
+{
+  "name": "openclaw-memory-alibaba-local",
+  "version": "0.1.0",
+  "description": "OpenClaw memory plugin: local LanceDB + DashScope-compatible embeddings",
+  "type": "module",
+  "license": "MIT",
+  "repository": {
+    "type": "git",
+    "url": "git+https://github.com/chengjue-2445/openclaw-memory-alibaba-mysql-install-skill.git",
+    "directory": "openclaw-memory-alibaba-local"
+  },
+  "keywords": [
+    "openclaw",
+    "openclaw-plugin",
+    "memory",
+    "lancedb",
+    "vector-search"
+  ],
+  "files": [
+    "index.ts",
+    "prompt-strip.ts",
+    "bm25-recall.ts",
+    "capture-state.ts",
+    "config.ts",
+    "db.ts",
+    "embed-chunks.ts",
+    "embedding-backend.ts",
+    "categories.ts",
+    "prompts.ts",
+    "web/memory-routes.ts",
+    "web/memory-ui.ts",
+    "openclaw.plugin.json",
+    "README.md"
+  ],
+  "dependencies": {
+    "@lancedb/lancedb": "^0.27.1",
+    "@sinclair/typebox": "0.34.48",
+    "openai": "^6.25.0"
+  },
+  "peerDependencies": {
+    "openclaw": "*"
+  },
+  "openclaw": {
+    "extensions": [
+      "./index.ts"
+    ]
+  },
+  "devDependencies": {
+    "typescript": "^6.0.2"
+  }
+}

package/prompt-strip.ts ADDED Viewed

@@ -0,0 +1,141 @@
+/**
+ * Strip OpenClaw channel / injection noise for logical memory extraction and recall query building.
+ */
+/** Matches OpenClaw `buildInboundMetadataBlocks` (control-ui / channels): ```json ... ``` after a labeled line. */
+const OPENCLAW_UNTRUSTED_METADATA_BLOCK_RE = new RegExp(
+  "(?:^|[\\r\\n])(?:Conversation info|Sender|Thread starter|Replied message|Forwarded message context|Chat history since last reply)\\s*\\([^)]*\\):\\s*" +
+    "```" +
+    "(?:json)?\\s*[\\s\\S]*?" +
+    "```" +
+    "\\s*",
+  "gim",
+);
+const THREAD_HISTORY_RE = /^\[Thread history - for context\]/;
+const THREAD_STARTER_RE = /^\[Thread starter - for context\]/;
+const MEDIA_ATTACHED_LINE_RE = /^\[media attached/;
+const MEDIA_REPLY_HINT_PREFIX = "To send an image back, prefer the message tool";
+const MIN_RECALL_QUERY_LEN = 5;
+function isSystemEventsOnlyParagraph(p: string): boolean {
+  const lines = p.split("\n");
+  const nonEmpty = lines.map((l) => l.trim()).filter(Boolean);
+  if (nonEmpty.length === 0) {
+    return false;
+  }
+  return nonEmpty.every((l) => l.startsWith("System:"));
+}
+/**
+ * Strip leading media lines + OpenClaw mediaReplyHint (single-line paragraph).
+ * OpenClaw joins these with `\n` (not always `\n\n`).
+ */
+function stripLeadingMediaAndHint(text: string): { rest: string; tags: string[] } {
+  const tags: string[] = [];
+  const lines = text.replace(/\r\n/g, "\n").split("\n");
+  let i = 0;
+  let sawMediaLine = false;
+  while (i < lines.length) {
+    const raw = lines[i] ?? "";
+    const t = raw.trim();
+    if (!t) {
+      i += 1;
+      continue;
+    }
+    if (MEDIA_ATTACHED_LINE_RE.test(t)) {
+      sawMediaLine = true;
+      i += 1;
+      continue;
+    }
+    if (t.startsWith(MEDIA_REPLY_HINT_PREFIX)) {
+      tags.push("media_reply_hint");
+      i += 1;
+      continue;
+    }
+    break;
+  }
+  if (sawMediaLine) {
+    tags.push("media_attached_lines");
+  }
+  const rest = lines.slice(i).join("\n").replace(/^\n+/, "").trimEnd();
+  return { rest, tags };
+}
+/**
+ * Strip prompt/channel noise before user_memory / self_improving extraction only.
+ * Full-context rows intentionally keep the raw transcript (including XML + OpenClaw metadata).
+ */
+export function stripForLogicalMemoryExtraction(text: string): string {
+  let out = text
+    .replace(/<\s*relevant-memories\b[\s\S]*?<\s*\/\s*relevant-memories\s*>/gi, "\n")
+    .replace(/<\s*knowledge-context\b[\s\S]*?<\s*\/\s*knowledge-context\s*>/gi, "\n");
+  let prev: string;
+  do {
+    prev = out;
+    out = out.replace(OPENCLAW_UNTRUSTED_METADATA_BLOCK_RE, "\n");
+  } while (out !== prev);
+  out = out.replace(/^\s*\[(?:Mon|Tue|Wed|Thu|Fri|Sat|Sun)\b[^[\]]*\]\s+/im, "");
+  out = out.replace(/\n{3,}/g, "\n\n").replace(/^\n+|\n+$/g, "").trim();
+  return out;
+}
+export type ExtractUserQueryForRecallResult = {
+  /** Text used for embedding + BM25 recall */
+  query: string;
+  /** True when query came from full-prompt strip fallback (prefix strip yielded too short) */
+  usedFallback: boolean;
+  /** High-level strip steps for logs */
+  removedLabels: string[];
+};
+/**
+ * Derive a recall query closer to "what the user said" than raw `event.prompt`.
+ * OpenClaw `prefixedCommandBody` stacks media, thread notes, system events, then user body — see get-reply-run.ts.
+ */
+export function extractUserQueryForRecall(rawPrompt: string): ExtractUserQueryForRecallResult {
+  const removedLabels: string[] = [];
+  let s = rawPrompt.replace(/\r\n/g, "\n").trim();
+  const mediaPass = stripLeadingMediaAndHint(s);
+  s = mediaPass.rest.trim();
+  for (const t of mediaPass.tags) {
+    removedLabels.push(t);
+  }
+  const segments = s.split(/\n\n+/).map((x) => x.trim()).filter(Boolean);
+  const kept: string[] = [];
+  for (const seg of segments) {
+    if (THREAD_HISTORY_RE.test(seg)) {
+      removedLabels.push("thread_history_block");
+      continue;
+    }
+    if (THREAD_STARTER_RE.test(seg)) {
+      removedLabels.push("thread_starter_block");
+      continue;
+    }
+    if (isSystemEventsOnlyParagraph(seg)) {
+      removedLabels.push("system_events_block");
+      continue;
+    }
+    kept.push(seg);
+  }
+  let joined = kept.join("\n\n").trim();
+  joined = stripForLogicalMemoryExtraction(joined).trim();
+  const strippedFull = stripForLogicalMemoryExtraction(rawPrompt.replace(/\r\n/g, "\n").trim()).trim();
+  if (joined.length >= MIN_RECALL_QUERY_LEN) {
+    return { query: joined, usedFallback: false, removedLabels };
+  }
+  if (strippedFull.length >= MIN_RECALL_QUERY_LEN) {
+    return { query: strippedFull, usedFallback: true, removedLabels };
+  }
+  return { query: joined.length > 0 ? joined : strippedFull, usedFallback: true, removedLabels };
+}

package/prompts.ts ADDED Viewed

@@ -0,0 +1,117 @@
+/**
+ * Prompt templates for LLM-based memory extraction (memoryExtractionMethod: "llm").
+ * Aligned with Mem0-style extraction instructions.
+ * @see https://github.com/mem0ai/mem0/blob/main/openclaw/index.ts
+ */
+/** Instruction block for extracting user memories from conversation messages. */
+export const USER_MEMORY_EXTRACTION_INSTRUCTIONS = `Your Task: Extract and maintain a structured, evolving profile of the user from their conversations with an AI assistant. Capture information that would help the assistant provide personalized, context-aware responses in future interactions.
+Information to Extract (map each to exactly one category):
+1. user_memory_fact — Identity, context, and verifiable facts:
+   - Name, age, location, timezone, language preferences
+   - Occupation, employer, job role, industry, education
+   - Tech stack, tools, development environment, skill level
+   - Names and roles of people they mention (colleagues, family, friends)
+   - Current projects (name, description, status)
+   - Significant life events, milestones, upcoming plans
+2. user_memory_preference — Preferences and opinions:
+   - Communication style (formal/casual, verbose/concise)
+   - Tool and technology preferences (languages, frameworks, editors, OS)
+   - Content preferences, learning style, likes and dislikes
+   - Strong opinions or values they've expressed
+   - Work patterns, routines, how they organize work
+3. user_memory_decision — Decisions and commitments:
+   - Important decisions made and their reasoning
+   - Short-term and long-term goals, deadlines, milestones
+   - Lessons learned, strategies that worked or failed
+   - Changed opinions or updated beliefs
+   - Commitments or promises (by user or assistant to the user)
+Guidelines:
+- Store memories as clear, self-contained statements. Each memory should make sense on its own.
+- Language of the "text" field: match the user's language. If user messages are mainly in Chinese, write every "text" in concise natural Chinese (第三人称，如「用户偏好…」「用户正在…」). If mainly English, use English third person ("User prefers…"). Do not translate Chinese source into English for extraction.
+- Use third person: "User prefers...", "User is working on...", not "I prefer..." (or Chinese equivalents).
+- Include temporal context when relevant: "As of [date], user is working on..." (or 中文日期表述).
+- When information updates, prefer updating the existing memory rather than creating duplicates.
+- Preserve specificity: "User uses Next.js 14 with App Router" is better than "User uses React".
+- Capture the WHY behind preferences when stated: "User prefers Vim because of keyboard-driven workflow".
+Exclude:
+- Passwords, API keys, tokens, or any authentication credentials
+- Exact financial amounts (unless the user explicitly asks to remember them)
+- Temporary or ephemeral information (one-time questions, debugging with no lasting insight)
+- Generic small talk with no informational content
+- Raw code snippets (capture the intent or decision, not the code itself)
+- Information the user explicitly asks not to remember
+Importance (required for each extraction):
+- You MUST assign an importance score between 0 and 1 to each memory.
+- 0 = trivial, easily forgotten (e.g. minor preference, one-off mention).
+- 0.5 = moderate (e.g. typical fact or preference the assistant should know).
+- 1 = critical (e.g. identity, strong preference, commitment, safety-related, explicit "remember this").
+- Use decimals as needed (e.g. 0.3, 0.7, 0.9). Do not omit this field.`;
+/** Suffix that defines the required JSON output format and precedes the user messages. */
+export const USER_MEMORY_EXTRACTION_FORMAT = `
+Reply with ONLY a single JSON object, no other text or markdown. Use this exact structure:
+{"extractions":[{"category":"user_memory_fact"|"user_memory_preference"|"user_memory_decision","text":"one clear third-person statement (中文若用户主要为中文)","importance":0.0 to 1.0}]}
+Every extraction MUST include "importance" (number 0–1). If nothing to remember, return: {"extractions":[]}
+User messages (extract from these; write "text" in Chinese when these are in Chinese):
+`;
+/** Full prompt body (instructions + format). Caller appends the actual user messages. */
+export function buildUserMemoryExtractionPrompt(): string {
+  return USER_MEMORY_EXTRACTION_INSTRUCTIONS + USER_MEMORY_EXTRACTION_FORMAT;
+}
+/** Instructions for extracting self-improving items (learnings, errors, feature requests) from assistant/user dialogue. */
+export const SELF_IMPROVING_EXTRACTION_INSTRUCTIONS = `Your Task: From a conversation between user and assistant, extract self-improving memory items the assistant (or system) should remember for future behavior.
+Categories (map each to exactly one):
+1. self_improving_learnings — Lessons, corrections, or best practices that emerged:
+   - "上线前必须重启服务使新代码生效"
+   - "User prefers to be addressed by first name"
+   - Technical or process learnings from the dialogue
+2. self_improving_errors — Mistakes, failures, or things to avoid:
+   - Errors the user or assistant encountered and how they were resolved
+   - "Do not assume X; always check Y first"
+3. self_improving_feature_requests — User or assistant requests for future behavior:
+   - "Remember to always confirm before deleting"
+   - Feature or workflow requests the user stated
+Guidelines:
+- Extract only clear, actionable items. One short sentence per item.
+- Prefer the language of the conversation (Chinese or English).
+- If nothing fits any category, return empty extractions.
+Exclude:
+- Passwords, API keys, tokens, or any authentication credentials
+- Exact financial amounts or sensitive personal data
+- One-off debugging logs or temporary error messages with no lasting lesson
+- Generic small talk or greetings with no actionable insight
+- Raw code snippets (capture the intent or rule, not the code itself)
+- Information the user or assistant explicitly asks not to remember
+- Injected template text (e.g. <relevant-memories>, <knowledge-context> labels) or metadata
+Importance (required for each extraction):
+- You MUST assign an importance score between 0 and 1 to each memory.
+- 0 = trivial, one-off tip with little impact on future behavior.
+- 0.5 = moderate (e.g. typical process lesson or preference the assistant should follow).
+- 1 = critical (e.g. safety-related rule, explicit "always/never" from user, recurring error pattern).
+- Use decimals as needed (e.g. 0.3, 0.7, 0.9). Do not omit this field.
+Reply with ONLY a single JSON object, no other text or markdown:
+{"extractions":[{"category":"self_improving_learnings"|"self_improving_errors"|"self_improving_feature_requests","text":"one short statement","importance":0.0 to 1.0}]}
+Every extraction MUST include "importance" (number 0–1). If nothing to extract, return: {"extractions":[]}
+Conversation (extract from this):
+`;