npm - memory-lancedb-pro - Versions diffs - 1.0.5 → 1.0.7 - Mend

memory-lancedb-pro 1.0.5 → 1.0.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

package/CHANGELOG.md +9 -0
package/index.ts +4 -4
package/openclaw.plugin.json +1 -1
package/package.json +2 -2
package/scripts/jsonl_distill.py +20 -0
package/src/adaptive-retrieval.ts +27 -1

package/CHANGELOG.md CHANGED Viewed

@@ -1,5 +1,14 @@
 # Changelog
+## 1.0.7
+- Fix: resolve `agentId` from hook context (`ctx?.agentId`) for `before_agent_start` and `agent_end`, restoring per-agent scope isolation when using multi-agent setups.
+## 1.0.6
+- Fix: auto-recall injection now correctly skips cron prompts wrapped as `[cron:...] run ...` (reduces token usage for cron jobs).
+- Fix: JSONL distill extractor filters more transcript/system noise (BOOT.md, HEARTBEAT, CLAUDE_CODE_DONE, queued blocks) to avoid polluting distillation batches.
 ## 1.0.5
 - Add: optional JSONL session distillation workflow (incremental cursor + batch format) via `scripts/jsonl_distill.py`.

package/index.ts CHANGED Viewed

@@ -365,14 +365,14 @@ const memoryLanceDBProPlugin = {
     // Auto-recall: inject relevant memories before agent starts
     if (config.autoRecall !== false) {
-      api.on("before_agent_start", async (event) => {
+      api.on("before_agent_start", async (event, ctx) => {
         if (!event.prompt || shouldSkipRetrieval(event.prompt)) {
           return;
         }
         try {
           // Determine agent ID and accessible scopes
-          const agentId = event.agentId || "main";
+          const agentId = ctx?.agentId || "main";
           const accessibleScopes = scopeManager.getAccessibleScopes(agentId);
           const results = await retriever.retrieve({
@@ -409,14 +409,14 @@ const memoryLanceDBProPlugin = {
     // Auto-capture: analyze and store important information after agent ends
     if (config.autoCapture !== false) {
-      api.on("agent_end", async (event) => {
+      api.on("agent_end", async (event, ctx) => {
         if (!event.success || !event.messages || event.messages.length === 0) {
           return;
         }
         try {
           // Determine agent ID and default scope
-          const agentId = event.agentId || "main";
+          const agentId = ctx?.agentId || "main";
           const defaultScope = scopeManager.getDefaultScope(agentId);
           // Extract text content from messages

package/openclaw.plugin.json CHANGED Viewed

@@ -2,7 +2,7 @@
   "id": "memory-lancedb-pro",
   "name": "Memory (LanceDB Pro)",
   "description": "Enhanced LanceDB-backed long-term memory with hybrid retrieval, multi-scope isolation, and management CLI",
-  "version": "1.0.5",
+  "version": "1.0.7",
   "kind": "memory",
   "configSchema": {
     "type": "object",

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "memory-lancedb-pro",
-  "version": "1.0.5",
+  "version": "1.0.7",
   "description": "OpenClaw enhanced LanceDB memory plugin with hybrid retrieval (Vector + BM25), cross-encoder rerank, multi-scope isolation, and management CLI",
   "type": "module",
   "main": "index.ts",
@@ -18,7 +18,7 @@
   ],
   "repository": {
     "type": "git",
-    "url": "https://github.com/win4r/memory-lancedb-pro"
+    "url": "git+https://github.com/win4r/memory-lancedb-pro.git"
   },
   "author": "win4r",
   "license": "MIT",

package/scripts/jsonl_distill.py CHANGED Viewed

@@ -104,6 +104,11 @@ def _clean_text(s: str) -> str:
     if "<relevant-memories>" in s:
         s = re.sub(r"<relevant-memories>[\s\S]*?</relevant-memories>", "", s)
+    # Strip OpenClaw transcript headers that add noise but not meaning.
+    # Keep the actual user content that follows.
+    s = re.sub(r"^Conversation info \(untrusted metadata\):\s*\n+", "", s, flags=re.IGNORECASE)
+    s = re.sub(r"^Replied message \(untrusted, for context\):\s*\n+", "", s, flags=re.IGNORECASE)
     # Drop embedded JSON blocks (often metadata) to reduce token waste.
     s = re.sub(r"```json[\s\S]*?```", "", s)
@@ -118,12 +123,27 @@ def _is_noise(s: str) -> bool:
     for p in NOISE_PREFIXES:
         if s.startswith(p):
             return True
+    lower = s.lower()
+    # Drop transcript/system boilerplate that should never become memories.
+    if "[queued messages while agent was busy]" in lower:
+        return True
+    if "you are running a boot check" in lower or "boot.md — gateway startup health check" in lower:
+        return True
+    if "read heartbeat.md" in lower:
+        return True
+    if "[claude_code_done]" in lower or "claude_code_done" in lower:
+        return True
     # Skip overly long blocks (logs / dumps). The distiller can still capture the essence later.
     if len(s) > 2000:
         return True
     # Skip pure code fences (usually tool output).
     if s.strip().startswith("```") and s.strip().endswith("```"):
         return True
     return False

package/src/adaptive-retrieval.ts CHANGED Viewed

@@ -32,12 +32,38 @@ const FORCE_RETRIEVE_PATTERNS = [
   /(你记得|之前|上次|以前|还记得|提到过|说过)/i,
 ];
+/**
+ * Normalize the raw prompt before applying skip/force rules.
+ *
+ * OpenClaw may wrap cron prompts like:
+ *   "[cron:<jobId> <jobName>] run ..."
+ *
+ * We strip such prefixes so command-style prompts are properly detected and we
+ * can skip auto-recall injection (saves tokens).
+ */
+function normalizeQuery(query: string): string {
+  let s = query.trim();
+  // Strip OpenClaw cron wrapper prefix.
+  s = s.replace(/^\[cron:[^\]]+\]\s*/i, "");
+  // Strip OpenClaw injected metadata header used in some transcripts.
+  if (/^Conversation info \(untrusted metadata\):/i.test(s)) {
+    s = s.replace(/^Conversation info \(untrusted metadata\):\s*/i, "");
+    // If there is a blank-line separator, keep only the part after it.
+    const parts = s.split(/\n\s*\n/, 2);
+    if (parts.length === 2) s = parts[1];
+  }
+  return s.trim();
+}
 /**
  * Determine if a query should skip memory retrieval.
  * Returns true if retrieval should be skipped.
  */
 export function shouldSkipRetrieval(query: string): boolean {
-  const trimmed = query.trim();
+  const trimmed = normalizeQuery(query);
   // Force retrieve if query has memory-related intent (checked FIRST,
   // before length check, so short CJK queries like "你记得吗" aren't skipped)