memory-lancedb-pro 1.0.5 → 1.0.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +9 -0
- package/index.ts +4 -4
- package/openclaw.plugin.json +1 -1
- package/package.json +2 -2
- package/scripts/jsonl_distill.py +20 -0
- package/src/adaptive-retrieval.ts +27 -1
package/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,14 @@
|
|
|
1
1
|
# Changelog
|
|
2
2
|
|
|
3
|
+
## 1.0.7
|
|
4
|
+
|
|
5
|
+
- Fix: resolve `agentId` from hook context (`ctx?.agentId`) for `before_agent_start` and `agent_end`, restoring per-agent scope isolation when using multi-agent setups.
|
|
6
|
+
|
|
7
|
+
## 1.0.6
|
|
8
|
+
|
|
9
|
+
- Fix: auto-recall injection now correctly skips cron prompts wrapped as `[cron:...] run ...` (reduces token usage for cron jobs).
|
|
10
|
+
- Fix: JSONL distill extractor filters more transcript/system noise (BOOT.md, HEARTBEAT, CLAUDE_CODE_DONE, queued blocks) to avoid polluting distillation batches.
|
|
11
|
+
|
|
3
12
|
## 1.0.5
|
|
4
13
|
|
|
5
14
|
- Add: optional JSONL session distillation workflow (incremental cursor + batch format) via `scripts/jsonl_distill.py`.
|
package/index.ts
CHANGED
|
@@ -365,14 +365,14 @@ const memoryLanceDBProPlugin = {
|
|
|
365
365
|
|
|
366
366
|
// Auto-recall: inject relevant memories before agent starts
|
|
367
367
|
if (config.autoRecall !== false) {
|
|
368
|
-
api.on("before_agent_start", async (event) => {
|
|
368
|
+
api.on("before_agent_start", async (event, ctx) => {
|
|
369
369
|
if (!event.prompt || shouldSkipRetrieval(event.prompt)) {
|
|
370
370
|
return;
|
|
371
371
|
}
|
|
372
372
|
|
|
373
373
|
try {
|
|
374
374
|
// Determine agent ID and accessible scopes
|
|
375
|
-
const agentId =
|
|
375
|
+
const agentId = ctx?.agentId || "main";
|
|
376
376
|
const accessibleScopes = scopeManager.getAccessibleScopes(agentId);
|
|
377
377
|
|
|
378
378
|
const results = await retriever.retrieve({
|
|
@@ -409,14 +409,14 @@ const memoryLanceDBProPlugin = {
|
|
|
409
409
|
|
|
410
410
|
// Auto-capture: analyze and store important information after agent ends
|
|
411
411
|
if (config.autoCapture !== false) {
|
|
412
|
-
api.on("agent_end", async (event) => {
|
|
412
|
+
api.on("agent_end", async (event, ctx) => {
|
|
413
413
|
if (!event.success || !event.messages || event.messages.length === 0) {
|
|
414
414
|
return;
|
|
415
415
|
}
|
|
416
416
|
|
|
417
417
|
try {
|
|
418
418
|
// Determine agent ID and default scope
|
|
419
|
-
const agentId =
|
|
419
|
+
const agentId = ctx?.agentId || "main";
|
|
420
420
|
const defaultScope = scopeManager.getDefaultScope(agentId);
|
|
421
421
|
|
|
422
422
|
// Extract text content from messages
|
package/openclaw.plugin.json
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
"id": "memory-lancedb-pro",
|
|
3
3
|
"name": "Memory (LanceDB Pro)",
|
|
4
4
|
"description": "Enhanced LanceDB-backed long-term memory with hybrid retrieval, multi-scope isolation, and management CLI",
|
|
5
|
-
"version": "1.0.
|
|
5
|
+
"version": "1.0.7",
|
|
6
6
|
"kind": "memory",
|
|
7
7
|
"configSchema": {
|
|
8
8
|
"type": "object",
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "memory-lancedb-pro",
|
|
3
|
-
"version": "1.0.
|
|
3
|
+
"version": "1.0.7",
|
|
4
4
|
"description": "OpenClaw enhanced LanceDB memory plugin with hybrid retrieval (Vector + BM25), cross-encoder rerank, multi-scope isolation, and management CLI",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "index.ts",
|
|
@@ -18,7 +18,7 @@
|
|
|
18
18
|
],
|
|
19
19
|
"repository": {
|
|
20
20
|
"type": "git",
|
|
21
|
-
"url": "https://github.com/win4r/memory-lancedb-pro"
|
|
21
|
+
"url": "git+https://github.com/win4r/memory-lancedb-pro.git"
|
|
22
22
|
},
|
|
23
23
|
"author": "win4r",
|
|
24
24
|
"license": "MIT",
|
package/scripts/jsonl_distill.py
CHANGED
|
@@ -104,6 +104,11 @@ def _clean_text(s: str) -> str:
|
|
|
104
104
|
if "<relevant-memories>" in s:
|
|
105
105
|
s = re.sub(r"<relevant-memories>[\s\S]*?</relevant-memories>", "", s)
|
|
106
106
|
|
|
107
|
+
# Strip OpenClaw transcript headers that add noise but not meaning.
|
|
108
|
+
# Keep the actual user content that follows.
|
|
109
|
+
s = re.sub(r"^Conversation info \(untrusted metadata\):\s*\n+", "", s, flags=re.IGNORECASE)
|
|
110
|
+
s = re.sub(r"^Replied message \(untrusted, for context\):\s*\n+", "", s, flags=re.IGNORECASE)
|
|
111
|
+
|
|
107
112
|
# Drop embedded JSON blocks (often metadata) to reduce token waste.
|
|
108
113
|
s = re.sub(r"```json[\s\S]*?```", "", s)
|
|
109
114
|
|
|
@@ -118,12 +123,27 @@ def _is_noise(s: str) -> bool:
|
|
|
118
123
|
for p in NOISE_PREFIXES:
|
|
119
124
|
if s.startswith(p):
|
|
120
125
|
return True
|
|
126
|
+
|
|
127
|
+
lower = s.lower()
|
|
128
|
+
|
|
129
|
+
# Drop transcript/system boilerplate that should never become memories.
|
|
130
|
+
if "[queued messages while agent was busy]" in lower:
|
|
131
|
+
return True
|
|
132
|
+
if "you are running a boot check" in lower or "boot.md — gateway startup health check" in lower:
|
|
133
|
+
return True
|
|
134
|
+
if "read heartbeat.md" in lower:
|
|
135
|
+
return True
|
|
136
|
+
if "[claude_code_done]" in lower or "claude_code_done" in lower:
|
|
137
|
+
return True
|
|
138
|
+
|
|
121
139
|
# Skip overly long blocks (logs / dumps). The distiller can still capture the essence later.
|
|
122
140
|
if len(s) > 2000:
|
|
123
141
|
return True
|
|
142
|
+
|
|
124
143
|
# Skip pure code fences (usually tool output).
|
|
125
144
|
if s.strip().startswith("```") and s.strip().endswith("```"):
|
|
126
145
|
return True
|
|
146
|
+
|
|
127
147
|
return False
|
|
128
148
|
|
|
129
149
|
|
|
@@ -32,12 +32,38 @@ const FORCE_RETRIEVE_PATTERNS = [
|
|
|
32
32
|
/(你记得|之前|上次|以前|还记得|提到过|说过)/i,
|
|
33
33
|
];
|
|
34
34
|
|
|
35
|
+
/**
|
|
36
|
+
* Normalize the raw prompt before applying skip/force rules.
|
|
37
|
+
*
|
|
38
|
+
* OpenClaw may wrap cron prompts like:
|
|
39
|
+
* "[cron:<jobId> <jobName>] run ..."
|
|
40
|
+
*
|
|
41
|
+
* We strip such prefixes so command-style prompts are properly detected and we
|
|
42
|
+
* can skip auto-recall injection (saves tokens).
|
|
43
|
+
*/
|
|
44
|
+
function normalizeQuery(query: string): string {
|
|
45
|
+
let s = query.trim();
|
|
46
|
+
|
|
47
|
+
// Strip OpenClaw cron wrapper prefix.
|
|
48
|
+
s = s.replace(/^\[cron:[^\]]+\]\s*/i, "");
|
|
49
|
+
|
|
50
|
+
// Strip OpenClaw injected metadata header used in some transcripts.
|
|
51
|
+
if (/^Conversation info \(untrusted metadata\):/i.test(s)) {
|
|
52
|
+
s = s.replace(/^Conversation info \(untrusted metadata\):\s*/i, "");
|
|
53
|
+
// If there is a blank-line separator, keep only the part after it.
|
|
54
|
+
const parts = s.split(/\n\s*\n/, 2);
|
|
55
|
+
if (parts.length === 2) s = parts[1];
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
return s.trim();
|
|
59
|
+
}
|
|
60
|
+
|
|
35
61
|
/**
|
|
36
62
|
* Determine if a query should skip memory retrieval.
|
|
37
63
|
* Returns true if retrieval should be skipped.
|
|
38
64
|
*/
|
|
39
65
|
export function shouldSkipRetrieval(query: string): boolean {
|
|
40
|
-
const trimmed = query
|
|
66
|
+
const trimmed = normalizeQuery(query);
|
|
41
67
|
|
|
42
68
|
// Force retrieve if query has memory-related intent (checked FIRST,
|
|
43
69
|
// before length check, so short CJK queries like "你记得吗" aren't skipped)
|