memory-lancedb-pro 1.0.5 → 1.0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -1,5 +1,10 @@
1
1
  # Changelog
2
2
 
3
+ ## 1.0.6
4
+
5
+ - Fix: auto-recall injection now correctly skips cron prompts wrapped as `[cron:...] run ...` (reduces token usage for cron jobs).
6
+ - Fix: JSONL distill extractor filters more transcript/system noise (BOOT.md, HEARTBEAT, CLAUDE_CODE_DONE, queued blocks) to avoid polluting distillation batches.
7
+
3
8
  ## 1.0.5
4
9
 
5
10
  - Add: optional JSONL session distillation workflow (incremental cursor + batch format) via `scripts/jsonl_distill.py`.
@@ -2,7 +2,7 @@
2
2
  "id": "memory-lancedb-pro",
3
3
  "name": "Memory (LanceDB Pro)",
4
4
  "description": "Enhanced LanceDB-backed long-term memory with hybrid retrieval, multi-scope isolation, and management CLI",
5
- "version": "1.0.5",
5
+ "version": "1.0.6",
6
6
  "kind": "memory",
7
7
  "configSchema": {
8
8
  "type": "object",
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "memory-lancedb-pro",
3
- "version": "1.0.5",
3
+ "version": "1.0.6",
4
4
  "description": "OpenClaw enhanced LanceDB memory plugin with hybrid retrieval (Vector + BM25), cross-encoder rerank, multi-scope isolation, and management CLI",
5
5
  "type": "module",
6
6
  "main": "index.ts",
@@ -18,7 +18,7 @@
18
18
  ],
19
19
  "repository": {
20
20
  "type": "git",
21
- "url": "https://github.com/win4r/memory-lancedb-pro"
21
+ "url": "git+https://github.com/win4r/memory-lancedb-pro.git"
22
22
  },
23
23
  "author": "win4r",
24
24
  "license": "MIT",
@@ -104,6 +104,11 @@ def _clean_text(s: str) -> str:
104
104
  if "<relevant-memories>" in s:
105
105
  s = re.sub(r"<relevant-memories>[\s\S]*?</relevant-memories>", "", s)
106
106
 
107
+ # Strip OpenClaw transcript headers that add noise but not meaning.
108
+ # Keep the actual user content that follows.
109
+ s = re.sub(r"^Conversation info \(untrusted metadata\):\s*\n+", "", s, flags=re.IGNORECASE)
110
+ s = re.sub(r"^Replied message \(untrusted, for context\):\s*\n+", "", s, flags=re.IGNORECASE)
111
+
107
112
  # Drop embedded JSON blocks (often metadata) to reduce token waste.
108
113
  s = re.sub(r"```json[\s\S]*?```", "", s)
109
114
 
@@ -118,12 +123,27 @@ def _is_noise(s: str) -> bool:
118
123
  for p in NOISE_PREFIXES:
119
124
  if s.startswith(p):
120
125
  return True
126
+
127
+ lower = s.lower()
128
+
129
+ # Drop transcript/system boilerplate that should never become memories.
130
+ if "[queued messages while agent was busy]" in lower:
131
+ return True
132
+ if "you are running a boot check" in lower or "boot.md — gateway startup health check" in lower:
133
+ return True
134
+ if "read heartbeat.md" in lower:
135
+ return True
136
+ if "[claude_code_done]" in lower or "claude_code_done" in lower:
137
+ return True
138
+
121
139
  # Skip overly long blocks (logs / dumps). The distiller can still capture the essence later.
122
140
  if len(s) > 2000:
123
141
  return True
142
+
124
143
  # Skip pure code fences (usually tool output).
125
144
  if s.strip().startswith("```") and s.strip().endswith("```"):
126
145
  return True
146
+
127
147
  return False
128
148
 
129
149
 
@@ -32,12 +32,38 @@ const FORCE_RETRIEVE_PATTERNS = [
32
32
  /(你记得|之前|上次|以前|还记得|提到过|说过)/i,
33
33
  ];
34
34
 
35
+ /**
36
+ * Normalize the raw prompt before applying skip/force rules.
37
+ *
38
+ * OpenClaw may wrap cron prompts like:
39
+ * "[cron:<jobId> <jobName>] run ..."
40
+ *
41
+ * We strip such prefixes so command-style prompts are properly detected and we
42
+ * can skip auto-recall injection (saves tokens).
43
+ */
44
+ function normalizeQuery(query: string): string {
45
+ let s = query.trim();
46
+
47
+ // Strip OpenClaw cron wrapper prefix.
48
+ s = s.replace(/^\[cron:[^\]]+\]\s*/i, "");
49
+
50
+ // Strip OpenClaw injected metadata header used in some transcripts.
51
+ if (/^Conversation info \(untrusted metadata\):/i.test(s)) {
52
+ s = s.replace(/^Conversation info \(untrusted metadata\):\s*/i, "");
53
+ // If there is a blank-line separator, keep only the part after it.
54
+ const parts = s.split(/\n\s*\n/, 2);
55
+ if (parts.length === 2) s = parts[1];
56
+ }
57
+
58
+ return s.trim();
59
+ }
60
+
35
61
  /**
36
62
  * Determine if a query should skip memory retrieval.
37
63
  * Returns true if retrieval should be skipped.
38
64
  */
39
65
  export function shouldSkipRetrieval(query: string): boolean {
40
- const trimmed = query.trim();
66
+ const trimmed = normalizeQuery(query);
41
67
 
42
68
  // Force retrieve if query has memory-related intent (checked FIRST,
43
69
  // before length check, so short CJK queries like "你记得吗" aren't skipped)