memory-lancedb-pro 1.0.5 → 1.0.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -1,5 +1,14 @@
1
1
  # Changelog
2
2
 
3
+ ## 1.0.7
4
+
5
+ - Fix: resolve `agentId` from hook context (`ctx?.agentId`) for `before_agent_start` and `agent_end`, restoring per-agent scope isolation when using multi-agent setups.
6
+
7
+ ## 1.0.6
8
+
9
+ - Fix: auto-recall injection now correctly skips cron prompts wrapped as `[cron:...] run ...` (reduces token usage for cron jobs).
10
+ - Fix: JSONL distill extractor filters more transcript/system noise (BOOT.md, HEARTBEAT, CLAUDE_CODE_DONE, queued blocks) to avoid polluting distillation batches.
11
+
3
12
  ## 1.0.5
4
13
 
5
14
  - Add: optional JSONL session distillation workflow (incremental cursor + batch format) via `scripts/jsonl_distill.py`.
package/index.ts CHANGED
@@ -365,14 +365,14 @@ const memoryLanceDBProPlugin = {
365
365
 
366
366
  // Auto-recall: inject relevant memories before agent starts
367
367
  if (config.autoRecall !== false) {
368
- api.on("before_agent_start", async (event) => {
368
+ api.on("before_agent_start", async (event, ctx) => {
369
369
  if (!event.prompt || shouldSkipRetrieval(event.prompt)) {
370
370
  return;
371
371
  }
372
372
 
373
373
  try {
374
374
  // Determine agent ID and accessible scopes
375
- const agentId = event.agentId || "main";
375
+ const agentId = ctx?.agentId || "main";
376
376
  const accessibleScopes = scopeManager.getAccessibleScopes(agentId);
377
377
 
378
378
  const results = await retriever.retrieve({
@@ -409,14 +409,14 @@ const memoryLanceDBProPlugin = {
409
409
 
410
410
  // Auto-capture: analyze and store important information after agent ends
411
411
  if (config.autoCapture !== false) {
412
- api.on("agent_end", async (event) => {
412
+ api.on("agent_end", async (event, ctx) => {
413
413
  if (!event.success || !event.messages || event.messages.length === 0) {
414
414
  return;
415
415
  }
416
416
 
417
417
  try {
418
418
  // Determine agent ID and default scope
419
- const agentId = event.agentId || "main";
419
+ const agentId = ctx?.agentId || "main";
420
420
  const defaultScope = scopeManager.getDefaultScope(agentId);
421
421
 
422
422
  // Extract text content from messages
@@ -2,7 +2,7 @@
2
2
  "id": "memory-lancedb-pro",
3
3
  "name": "Memory (LanceDB Pro)",
4
4
  "description": "Enhanced LanceDB-backed long-term memory with hybrid retrieval, multi-scope isolation, and management CLI",
5
- "version": "1.0.5",
5
+ "version": "1.0.7",
6
6
  "kind": "memory",
7
7
  "configSchema": {
8
8
  "type": "object",
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "memory-lancedb-pro",
3
- "version": "1.0.5",
3
+ "version": "1.0.7",
4
4
  "description": "OpenClaw enhanced LanceDB memory plugin with hybrid retrieval (Vector + BM25), cross-encoder rerank, multi-scope isolation, and management CLI",
5
5
  "type": "module",
6
6
  "main": "index.ts",
@@ -18,7 +18,7 @@
18
18
  ],
19
19
  "repository": {
20
20
  "type": "git",
21
- "url": "https://github.com/win4r/memory-lancedb-pro"
21
+ "url": "git+https://github.com/win4r/memory-lancedb-pro.git"
22
22
  },
23
23
  "author": "win4r",
24
24
  "license": "MIT",
@@ -104,6 +104,11 @@ def _clean_text(s: str) -> str:
104
104
  if "<relevant-memories>" in s:
105
105
  s = re.sub(r"<relevant-memories>[\s\S]*?</relevant-memories>", "", s)
106
106
 
107
+ # Strip OpenClaw transcript headers that add noise but not meaning.
108
+ # Keep the actual user content that follows.
109
+ s = re.sub(r"^Conversation info \(untrusted metadata\):\s*\n+", "", s, flags=re.IGNORECASE)
110
+ s = re.sub(r"^Replied message \(untrusted, for context\):\s*\n+", "", s, flags=re.IGNORECASE)
111
+
107
112
  # Drop embedded JSON blocks (often metadata) to reduce token waste.
108
113
  s = re.sub(r"```json[\s\S]*?```", "", s)
109
114
 
@@ -118,12 +123,27 @@ def _is_noise(s: str) -> bool:
118
123
  for p in NOISE_PREFIXES:
119
124
  if s.startswith(p):
120
125
  return True
126
+
127
+ lower = s.lower()
128
+
129
+ # Drop transcript/system boilerplate that should never become memories.
130
+ if "[queued messages while agent was busy]" in lower:
131
+ return True
132
+ if "you are running a boot check" in lower or "boot.md — gateway startup health check" in lower:
133
+ return True
134
+ if "read heartbeat.md" in lower:
135
+ return True
136
+ if "[claude_code_done]" in lower or "claude_code_done" in lower:
137
+ return True
138
+
121
139
  # Skip overly long blocks (logs / dumps). The distiller can still capture the essence later.
122
140
  if len(s) > 2000:
123
141
  return True
142
+
124
143
  # Skip pure code fences (usually tool output).
125
144
  if s.strip().startswith("```") and s.strip().endswith("```"):
126
145
  return True
146
+
127
147
  return False
128
148
 
129
149
 
@@ -32,12 +32,38 @@ const FORCE_RETRIEVE_PATTERNS = [
32
32
  /(你记得|之前|上次|以前|还记得|提到过|说过)/i,
33
33
  ];
34
34
 
35
+ /**
36
+ * Normalize the raw prompt before applying skip/force rules.
37
+ *
38
+ * OpenClaw may wrap cron prompts like:
39
+ * "[cron:<jobId> <jobName>] run ..."
40
+ *
41
+ * We strip such prefixes so command-style prompts are properly detected and we
42
+ * can skip auto-recall injection (saves tokens).
43
+ */
44
+ function normalizeQuery(query: string): string {
45
+ let s = query.trim();
46
+
47
+ // Strip OpenClaw cron wrapper prefix.
48
+ s = s.replace(/^\[cron:[^\]]+\]\s*/i, "");
49
+
50
+ // Strip OpenClaw injected metadata header used in some transcripts.
51
+ if (/^Conversation info \(untrusted metadata\):/i.test(s)) {
52
+ s = s.replace(/^Conversation info \(untrusted metadata\):\s*/i, "");
53
+ // If there is a blank-line separator, keep only the part after it.
54
+ const parts = s.split(/\n\s*\n/, 2);
55
+ if (parts.length === 2) s = parts[1];
56
+ }
57
+
58
+ return s.trim();
59
+ }
60
+
35
61
  /**
36
62
  * Determine if a query should skip memory retrieval.
37
63
  * Returns true if retrieval should be skipped.
38
64
  */
39
65
  export function shouldSkipRetrieval(query: string): boolean {
40
- const trimmed = query.trim();
66
+ const trimmed = normalizeQuery(query);
41
67
 
42
68
  // Force retrieve if query has memory-related intent (checked FIRST,
43
69
  // before length check, so short CJK queries like "你记得吗" aren't skipped)