memory-lancedb-pro 1.0.5 → 1.0.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +5 -0
- package/openclaw.plugin.json +1 -1
- package/package.json +2 -2
- package/scripts/jsonl_distill.py +20 -0
- package/src/adaptive-retrieval.ts +27 -1
package/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,10 @@
|
|
|
1
1
|
# Changelog
|
|
2
2
|
|
|
3
|
+
## 1.0.6
|
|
4
|
+
|
|
5
|
+
- Fix: auto-recall injection now correctly skips cron prompts wrapped as `[cron:...] run ...` (reduces token usage for cron jobs).
|
|
6
|
+
- Fix: JSONL distill extractor filters more transcript/system noise (BOOT.md, HEARTBEAT, CLAUDE_CODE_DONE, queued blocks) to avoid polluting distillation batches.
|
|
7
|
+
|
|
3
8
|
## 1.0.5
|
|
4
9
|
|
|
5
10
|
- Add: optional JSONL session distillation workflow (incremental cursor + batch format) via `scripts/jsonl_distill.py`.
|
package/openclaw.plugin.json
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
"id": "memory-lancedb-pro",
|
|
3
3
|
"name": "Memory (LanceDB Pro)",
|
|
4
4
|
"description": "Enhanced LanceDB-backed long-term memory with hybrid retrieval, multi-scope isolation, and management CLI",
|
|
5
|
-
"version": "1.0.
|
|
5
|
+
"version": "1.0.6",
|
|
6
6
|
"kind": "memory",
|
|
7
7
|
"configSchema": {
|
|
8
8
|
"type": "object",
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "memory-lancedb-pro",
|
|
3
|
-
"version": "1.0.
|
|
3
|
+
"version": "1.0.6",
|
|
4
4
|
"description": "OpenClaw enhanced LanceDB memory plugin with hybrid retrieval (Vector + BM25), cross-encoder rerank, multi-scope isolation, and management CLI",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "index.ts",
|
|
@@ -18,7 +18,7 @@
|
|
|
18
18
|
],
|
|
19
19
|
"repository": {
|
|
20
20
|
"type": "git",
|
|
21
|
-
"url": "https://github.com/win4r/memory-lancedb-pro"
|
|
21
|
+
"url": "git+https://github.com/win4r/memory-lancedb-pro.git"
|
|
22
22
|
},
|
|
23
23
|
"author": "win4r",
|
|
24
24
|
"license": "MIT",
|
package/scripts/jsonl_distill.py
CHANGED
|
@@ -104,6 +104,11 @@ def _clean_text(s: str) -> str:
|
|
|
104
104
|
if "<relevant-memories>" in s:
|
|
105
105
|
s = re.sub(r"<relevant-memories>[\s\S]*?</relevant-memories>", "", s)
|
|
106
106
|
|
|
107
|
+
# Strip OpenClaw transcript headers that add noise but not meaning.
|
|
108
|
+
# Keep the actual user content that follows.
|
|
109
|
+
s = re.sub(r"^Conversation info \(untrusted metadata\):\s*\n+", "", s, flags=re.IGNORECASE)
|
|
110
|
+
s = re.sub(r"^Replied message \(untrusted, for context\):\s*\n+", "", s, flags=re.IGNORECASE)
|
|
111
|
+
|
|
107
112
|
# Drop embedded JSON blocks (often metadata) to reduce token waste.
|
|
108
113
|
s = re.sub(r"```json[\s\S]*?```", "", s)
|
|
109
114
|
|
|
@@ -118,12 +123,27 @@ def _is_noise(s: str) -> bool:
|
|
|
118
123
|
for p in NOISE_PREFIXES:
|
|
119
124
|
if s.startswith(p):
|
|
120
125
|
return True
|
|
126
|
+
|
|
127
|
+
lower = s.lower()
|
|
128
|
+
|
|
129
|
+
# Drop transcript/system boilerplate that should never become memories.
|
|
130
|
+
if "[queued messages while agent was busy]" in lower:
|
|
131
|
+
return True
|
|
132
|
+
if "you are running a boot check" in lower or "boot.md — gateway startup health check" in lower:
|
|
133
|
+
return True
|
|
134
|
+
if "read heartbeat.md" in lower:
|
|
135
|
+
return True
|
|
136
|
+
if "[claude_code_done]" in lower or "claude_code_done" in lower:
|
|
137
|
+
return True
|
|
138
|
+
|
|
121
139
|
# Skip overly long blocks (logs / dumps). The distiller can still capture the essence later.
|
|
122
140
|
if len(s) > 2000:
|
|
123
141
|
return True
|
|
142
|
+
|
|
124
143
|
# Skip pure code fences (usually tool output).
|
|
125
144
|
if s.strip().startswith("```") and s.strip().endswith("```"):
|
|
126
145
|
return True
|
|
146
|
+
|
|
127
147
|
return False
|
|
128
148
|
|
|
129
149
|
|
|
@@ -32,12 +32,38 @@ const FORCE_RETRIEVE_PATTERNS = [
|
|
|
32
32
|
/(你记得|之前|上次|以前|还记得|提到过|说过)/i,
|
|
33
33
|
];
|
|
34
34
|
|
|
35
|
+
/**
|
|
36
|
+
* Normalize the raw prompt before applying skip/force rules.
|
|
37
|
+
*
|
|
38
|
+
* OpenClaw may wrap cron prompts like:
|
|
39
|
+
* "[cron:<jobId> <jobName>] run ..."
|
|
40
|
+
*
|
|
41
|
+
* We strip such prefixes so command-style prompts are properly detected and we
|
|
42
|
+
* can skip auto-recall injection (saves tokens).
|
|
43
|
+
*/
|
|
44
|
+
function normalizeQuery(query: string): string {
|
|
45
|
+
let s = query.trim();
|
|
46
|
+
|
|
47
|
+
// Strip OpenClaw cron wrapper prefix.
|
|
48
|
+
s = s.replace(/^\[cron:[^\]]+\]\s*/i, "");
|
|
49
|
+
|
|
50
|
+
// Strip OpenClaw injected metadata header used in some transcripts.
|
|
51
|
+
if (/^Conversation info \(untrusted metadata\):/i.test(s)) {
|
|
52
|
+
s = s.replace(/^Conversation info \(untrusted metadata\):\s*/i, "");
|
|
53
|
+
// If there is a blank-line separator, keep only the part after it.
|
|
54
|
+
const parts = s.split(/\n\s*\n/, 2);
|
|
55
|
+
if (parts.length === 2) s = parts[1];
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
return s.trim();
|
|
59
|
+
}
|
|
60
|
+
|
|
35
61
|
/**
|
|
36
62
|
* Determine if a query should skip memory retrieval.
|
|
37
63
|
* Returns true if retrieval should be skipped.
|
|
38
64
|
*/
|
|
39
65
|
export function shouldSkipRetrieval(query: string): boolean {
|
|
40
|
-
const trimmed = query
|
|
66
|
+
const trimmed = normalizeQuery(query);
|
|
41
67
|
|
|
42
68
|
// Force retrieve if query has memory-related intent (checked FIRST,
|
|
43
69
|
// before length check, so short CJK queries like "你记得吗" aren't skipped)
|