memory-lancedb-pro 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,385 @@
1
+ {
2
+ "id": "memory-lancedb-pro",
3
+ "name": "Memory (LanceDB Pro)",
4
+ "description": "Enhanced LanceDB-backed long-term memory with hybrid retrieval, multi-scope isolation, and management CLI",
5
+ "version": "2026.2.16",
6
+ "kind": "memory",
7
+ "configSchema": {
8
+ "type": "object",
9
+ "additionalProperties": false,
10
+ "properties": {
11
+ "embedding": {
12
+ "type": "object",
13
+ "additionalProperties": false,
14
+ "properties": {
15
+ "provider": {
16
+ "type": "string",
17
+ "const": "openai-compatible"
18
+ },
19
+ "apiKey": {
20
+ "type": "string"
21
+ },
22
+ "model": {
23
+ "type": "string"
24
+ },
25
+ "baseURL": {
26
+ "type": "string"
27
+ },
28
+ "dimensions": {
29
+ "type": "integer",
30
+ "minimum": 1
31
+ },
32
+ "taskQuery": {
33
+ "type": "string",
34
+ "description": "Embedding task for queries (provider-specific, e.g. Jina: retrieval.query)"
35
+ },
36
+ "taskPassage": {
37
+ "type": "string",
38
+ "description": "Embedding task for passages/documents (provider-specific, e.g. Jina: retrieval.passage)"
39
+ },
40
+ "normalized": {
41
+ "type": "boolean",
42
+ "description": "Request normalized embeddings when supported by the provider (e.g. Jina v5)"
43
+ }
44
+ },
45
+ "required": [
46
+ "apiKey"
47
+ ]
48
+ },
49
+ "dbPath": {
50
+ "type": "string"
51
+ },
52
+ "enableManagementTools": {
53
+ "type": "boolean",
54
+ "default": false,
55
+ "description": "Enable memory_list and memory_stats management tools"
56
+ },
57
+ "autoCapture": {
58
+ "type": "boolean"
59
+ },
60
+ "autoRecall": {
61
+ "type": "boolean"
62
+ },
63
+ "captureAssistant": {
64
+ "type": "boolean"
65
+ },
66
+ "retrieval": {
67
+ "type": "object",
68
+ "additionalProperties": false,
69
+ "properties": {
70
+ "mode": {
71
+ "type": "string",
72
+ "enum": [
73
+ "hybrid",
74
+ "vector"
75
+ ],
76
+ "default": "hybrid"
77
+ },
78
+ "vectorWeight": {
79
+ "type": "number",
80
+ "minimum": 0,
81
+ "maximum": 1,
82
+ "default": 0.7
83
+ },
84
+ "bm25Weight": {
85
+ "type": "number",
86
+ "minimum": 0,
87
+ "maximum": 1,
88
+ "default": 0.3
89
+ },
90
+ "minScore": {
91
+ "type": "number",
92
+ "minimum": 0,
93
+ "maximum": 1,
94
+ "default": 0.3
95
+ },
96
+ "rerank": {
97
+ "type": "string",
98
+ "enum": [
99
+ "cross-encoder",
100
+ "lightweight",
101
+ "none"
102
+ ],
103
+ "default": "cross-encoder"
104
+ },
105
+ "rerankApiKey": {
106
+ "type": "string",
107
+ "description": "API key for reranker service (enables cross-encoder reranking)"
108
+ },
109
+ "rerankModel": {
110
+ "type": "string",
111
+ "default": "jina-reranker-v2-base-multilingual",
112
+ "description": "Reranker model name"
113
+ },
114
+ "rerankEndpoint": {
115
+ "type": "string",
116
+ "default": "https://api.jina.ai/v1/rerank",
117
+ "description": "Reranker API endpoint URL. Compatible with Jina, SiliconFlow, Pinecone, or any service with a similar interface."
118
+ },
119
+ "rerankProvider": {
120
+ "type": "string",
121
+ "enum": ["jina", "siliconflow", "pinecone"],
122
+ "default": "jina",
123
+ "description": "Reranker provider format. Determines request/response shape and auth header."
124
+ },
125
+ "candidatePoolSize": {
126
+ "type": "integer",
127
+ "minimum": 10,
128
+ "maximum": 100,
129
+ "default": 20
130
+ },
131
+ "recencyHalfLifeDays": {
132
+ "type": "number",
133
+ "minimum": 0,
134
+ "maximum": 365,
135
+ "default": 14,
136
+ "description": "Half-life in days for recency boost. Newer memories get higher scores. Set 0 to disable."
137
+ },
138
+ "recencyWeight": {
139
+ "type": "number",
140
+ "minimum": 0,
141
+ "maximum": 0.5,
142
+ "default": 0.1,
143
+ "description": "Maximum recency boost factor added to score"
144
+ },
145
+ "filterNoise": {
146
+ "type": "boolean",
147
+ "default": true,
148
+ "description": "Filter out noise memories (agent denials, meta-questions, boilerplate)"
149
+ },
150
+ "lengthNormAnchor": {
151
+ "type": "integer",
152
+ "minimum": 0,
153
+ "maximum": 5000,
154
+ "default": 500,
155
+ "description": "Length normalization anchor in chars. Entries longer than this get score penalized. Set 0 to disable."
156
+ },
157
+ "hardMinScore": {
158
+ "type": "number",
159
+ "minimum": 0,
160
+ "maximum": 1,
161
+ "default": 0.35,
162
+ "description": "Hard cutoff after all scoring stages. Results below this score are discarded."
163
+ },
164
+ "timeDecayHalfLifeDays": {
165
+ "type": "number",
166
+ "minimum": 0,
167
+ "maximum": 365,
168
+ "default": 60,
169
+ "description": "Time decay half-life in days. Old entries lose score gradually. Floor at 0.5x. Set 0 to disable."
170
+ }
171
+ }
172
+ },
173
+ "sessionMemory": {
174
+ "type": "object",
175
+ "additionalProperties": false,
176
+ "properties": {
177
+ "enabled": {
178
+ "type": "boolean",
179
+ "default": true,
180
+ "description": "Store session summaries to LanceDB on /new command (replaces built-in session-memory hook)"
181
+ },
182
+ "messageCount": {
183
+ "type": "integer",
184
+ "minimum": 1,
185
+ "maximum": 100,
186
+ "default": 15,
187
+ "description": "Number of recent messages to include in session summary"
188
+ }
189
+ }
190
+ },
191
+ "scopes": {
192
+ "type": "object",
193
+ "additionalProperties": false,
194
+ "properties": {
195
+ "default": {
196
+ "type": "string",
197
+ "default": "global"
198
+ },
199
+ "definitions": {
200
+ "type": "object",
201
+ "additionalProperties": {
202
+ "type": "object",
203
+ "properties": {
204
+ "description": {
205
+ "type": "string"
206
+ }
207
+ }
208
+ }
209
+ },
210
+ "agentAccess": {
211
+ "type": "object",
212
+ "additionalProperties": {
213
+ "type": "array",
214
+ "items": {
215
+ "type": "string"
216
+ }
217
+ }
218
+ }
219
+ }
220
+ }
221
+ },
222
+ "required": [
223
+ "embedding"
224
+ ]
225
+ },
226
+ "uiHints": {
227
+ "embedding.apiKey": {
228
+ "label": "API Key",
229
+ "sensitive": true,
230
+ "placeholder": "sk-proj-... or ${GEMINI_API_KEY} or 'ollama'",
231
+ "help": "API key for the embedding provider (or use ${OPENAI_API_KEY}; use a dummy value for keyless local endpoints)"
232
+ },
233
+ "embedding.model": {
234
+ "label": "Embedding Model",
235
+ "placeholder": "text-embedding-3-small",
236
+ "help": "Embedding model name (e.g. text-embedding-3-small, gemini-embedding-001, nomic-embed-text)"
237
+ },
238
+ "embedding.baseURL": {
239
+ "label": "Base URL",
240
+ "placeholder": "https://api.openai.com/v1",
241
+ "help": "Custom base URL for OpenAI-compatible embedding endpoints (e.g. https://generativelanguage.googleapis.com/v1beta/openai/ for Gemini, http://localhost:11434/v1 for Ollama)",
242
+ "advanced": true
243
+ },
244
+ "embedding.dimensions": {
245
+ "label": "Vector Dimensions",
246
+ "placeholder": "auto-detected from model",
247
+ "help": "Override vector dimensions for custom models not in the built-in lookup table",
248
+ "advanced": true
249
+ },
250
+ "dbPath": {
251
+ "label": "Database Path",
252
+ "placeholder": "~/.openclaw/memory/lancedb-pro",
253
+ "help": "Directory path for the LanceDB database files",
254
+ "advanced": true
255
+ },
256
+ "autoCapture": {
257
+ "label": "Auto-Capture",
258
+ "help": "Automatically capture important information from conversations"
259
+ },
260
+ "autoRecall": {
261
+ "label": "Auto-Recall",
262
+ "help": "Automatically inject relevant memories into context"
263
+ },
264
+ "captureAssistant": {
265
+ "label": "Capture Assistant Messages",
266
+ "help": "Also auto-capture assistant messages (default false to reduce memory pollution)",
267
+ "advanced": true
268
+ },
269
+ "retrieval.mode": {
270
+ "label": "Retrieval Mode",
271
+ "help": "Use hybrid search (vector + BM25) or vector-only for backward compatibility",
272
+ "advanced": true
273
+ },
274
+ "retrieval.vectorWeight": {
275
+ "label": "Vector Search Weight",
276
+ "help": "Weight for vector similarity in hybrid search (0-1)",
277
+ "advanced": true
278
+ },
279
+ "retrieval.bm25Weight": {
280
+ "label": "BM25 Search Weight",
281
+ "help": "Weight for BM25 keyword search in hybrid search (0-1)",
282
+ "advanced": true
283
+ },
284
+ "retrieval.minScore": {
285
+ "label": "Minimum Score Threshold",
286
+ "help": "Drop results below this relevance score (0-1)",
287
+ "advanced": true
288
+ },
289
+ "retrieval.rerank": {
290
+ "label": "Reranking Mode",
291
+ "help": "Re-score fused results for better quality (cross-encoder uses configured reranker API)",
292
+ "advanced": true
293
+ },
294
+ "retrieval.rerankApiKey": {
295
+ "label": "Reranker API Key",
296
+ "sensitive": true,
297
+ "placeholder": "jina_... / sk-... / pcsk_...",
298
+ "help": "Reranker API key for cross-encoder reranking",
299
+ "advanced": true
300
+ },
301
+ "retrieval.rerankModel": {
302
+ "label": "Reranker Model",
303
+ "placeholder": "jina-reranker-v2-base-multilingual",
304
+ "help": "Reranker model name (e.g. jina-reranker-v2-base-multilingual, BAAI/bge-reranker-v2-m3)",
305
+ "advanced": true
306
+ },
307
+ "retrieval.rerankEndpoint": {
308
+ "label": "Reranker Endpoint",
309
+ "placeholder": "https://api.jina.ai/v1/rerank",
310
+ "help": "Custom reranker API endpoint URL",
311
+ "advanced": true
312
+ },
313
+ "retrieval.rerankProvider": {
314
+ "label": "Reranker Provider",
315
+ "help": "Provider format: jina (default), siliconflow, or pinecone",
316
+ "advanced": true
317
+ },
318
+ "retrieval.candidatePoolSize": {
319
+ "label": "Candidate Pool Size",
320
+ "help": "Number of candidates to fetch before fusion and reranking",
321
+ "advanced": true
322
+ },
323
+ "sessionMemory.enabled": {
324
+ "label": "Session Memory",
325
+ "help": "Store session summaries to LanceDB when /new is triggered (replaces built-in session-memory hook)"
326
+ },
327
+ "sessionMemory.messageCount": {
328
+ "label": "Session Message Count",
329
+ "help": "Number of recent messages to include in session summaries",
330
+ "advanced": true
331
+ },
332
+ "scopes.default": {
333
+ "label": "Default Scope",
334
+ "help": "Default memory scope for new memories",
335
+ "advanced": true
336
+ },
337
+ "scopes.definitions": {
338
+ "label": "Scope Definitions",
339
+ "help": "Define custom memory scopes with descriptions",
340
+ "advanced": true
341
+ },
342
+ "scopes.agentAccess": {
343
+ "label": "Agent Access Control",
344
+ "help": "Define which scopes each agent can access",
345
+ "advanced": true
346
+ },
347
+ "enableManagementTools": {
348
+ "label": "Management Tools",
349
+ "help": "Enable memory_list and memory_stats tools for debugging and auditing",
350
+ "advanced": true
351
+ },
352
+ "retrieval.lengthNormAnchor": {
353
+ "label": "Length Normalization Anchor",
354
+ "help": "Entries longer than this (chars) get score penalized to prevent long entries dominating. 0 = disabled.",
355
+ "advanced": true
356
+ },
357
+ "retrieval.hardMinScore": {
358
+ "label": "Hard Minimum Score",
359
+ "help": "Discard results below this score after all scoring stages. Higher = fewer but more relevant results.",
360
+ "advanced": true
361
+ },
362
+ "retrieval.timeDecayHalfLifeDays": {
363
+ "label": "Time Decay Half-Life",
364
+ "help": "Old entries lose score over this many days. Floor at 0.5x. 0 = disabled.",
365
+ "advanced": true
366
+ },
367
+ "embedding.taskQuery": {
368
+ "label": "Query Task",
369
+ "placeholder": "retrieval.query",
370
+ "help": "Optional task selector for query embeddings (Jina: retrieval.query). If unset, no task field is sent.",
371
+ "advanced": true
372
+ },
373
+ "embedding.taskPassage": {
374
+ "label": "Passage Task",
375
+ "placeholder": "retrieval.passage",
376
+ "help": "Optional task selector for passage/document embeddings (Jina: retrieval.passage). If unset, no task field is sent.",
377
+ "advanced": true
378
+ },
379
+ "embedding.normalized": {
380
+ "label": "Normalized Embeddings",
381
+ "help": "Request normalized embeddings when the provider supports it (Jina v5). If unset, the field is not sent.",
382
+ "advanced": true
383
+ }
384
+ }
385
+ }
package/package.json ADDED
@@ -0,0 +1,38 @@
1
+ {
2
+ "name": "memory-lancedb-pro",
3
+ "version": "1.0.0",
4
+ "description": "OpenClaw enhanced LanceDB memory plugin with hybrid retrieval (Vector + BM25), cross-encoder rerank, multi-scope isolation, and management CLI",
5
+ "type": "module",
6
+ "main": "index.ts",
7
+ "keywords": [
8
+ "openclaw",
9
+ "openclaw-plugin",
10
+ "memory",
11
+ "lancedb",
12
+ "vector-search",
13
+ "bm25",
14
+ "hybrid-retrieval",
15
+ "rerank",
16
+ "ai-memory",
17
+ "long-term-memory"
18
+ ],
19
+ "repository": {
20
+ "type": "git",
21
+ "url": "https://github.com/win4r/memory-lancedb-pro"
22
+ },
23
+ "author": "win4r",
24
+ "license": "MIT",
25
+ "dependencies": {
26
+ "@lancedb/lancedb": "^0.26.2",
27
+ "@sinclair/typebox": "0.34.48",
28
+ "openai": "^6.21.0"
29
+ },
30
+ "openclaw": {
31
+ "extensions": [
32
+ "./index.ts"
33
+ ]
34
+ },
35
+ "devDependencies": {
36
+ "typescript": "^5.9.3"
37
+ }
38
+ }
@@ -0,0 +1,28 @@
1
+ ---
2
+ name: lesson
3
+ description: Store a lesson learned from the current conversation. Triggered by /lesson command. Use when Master signals that the recent conversation contains a pitfall, fix, or key insight that should be persisted to long-term memory.
4
+ ---
5
+
6
+ # Lesson Extraction & Storage
7
+
8
+ When triggered, extract and store lessons from the **recent conversation context**.
9
+
10
+ ## Steps
11
+
12
+ 1. **Scan recent context** — identify the pitfall, bug fix, or key insight just discussed
13
+ 2. **Store technical layer** (category: fact, importance ≥ 0.8):
14
+ ```
15
+ Pitfall: [symptom]. Cause: [root cause]. Fix: [solution]. Prevention: [how to avoid].
16
+ ```
17
+ 3. **Store principle layer** (category: decision, importance ≥ 0.85):
18
+ ```
19
+ Decision principle ([tag]): [behavioral rule]. Trigger: [when]. Action: [what to do].
20
+ ```
21
+ 4. **Verify** — `memory_recall` with anchor keywords to confirm both entries retrievable
22
+ 5. **Report** — tell Master what was stored (brief summary)
23
+
24
+ ## Rules
25
+
26
+ - Keep entries short and atomic (< 500 chars each)
27
+ - If the lesson also affects a checklist or SKILL.md, update those files too
28
+ - If no clear lesson is found in recent context, ask Master what to store
@@ -0,0 +1,60 @@
1
+ /**
2
+ * Adaptive Retrieval
3
+ * Determines whether a query needs memory retrieval at all.
4
+ * Skips retrieval for greetings, commands, simple instructions, and system messages.
5
+ * Saves embedding API calls and reduces noise injection.
6
+ */
7
+
8
+ // Queries that are clearly NOT memory-retrieval candidates
9
+ const SKIP_PATTERNS = [
10
+ // Greetings & pleasantries
11
+ /^(hi|hello|hey|good\s*(morning|afternoon|evening|night)|greetings|yo|sup|howdy|what'?s up)\b/i,
12
+ // System/bot commands
13
+ /^\//, // slash commands
14
+ /^(run|build|test|ls|cd|git|npm|pip|docker|curl|cat|grep|find|make|sudo)\b/i,
15
+ // Simple affirmations/negations
16
+ /^(yes|no|yep|nope|ok|okay|sure|fine|thanks|thank you|thx|ty|got it|understood|cool|nice|great|good|perfect|awesome|👍|👎|✅|❌)\s*[.!]?$/i,
17
+ // Continuation prompts
18
+ /^(go ahead|continue|proceed|do it|start|begin|next|实施|开始|继续|好的|可以|行)\s*[.!]?$/i,
19
+ // Pure emoji
20
+ /^[\p{Emoji}\s]+$/u,
21
+ // Heartbeat/system
22
+ /^HEARTBEAT/i,
23
+ /^\[System/i,
24
+ ];
25
+
26
+ // Queries that SHOULD trigger retrieval even if short
27
+ const FORCE_RETRIEVE_PATTERNS = [
28
+ /\b(remember|recall|forgot|memory|memories)\b/i,
29
+ /\b(last time|before|previously|earlier|yesterday|ago)\b/i,
30
+ /\b(my (name|email|phone|address|birthday|preference))\b/i,
31
+ /\b(what did (i|we)|did i (tell|say|mention))\b/i,
32
+ /(你记得|之前|上次|以前|还记得|提到过|说过)/i,
33
+ ];
34
+
35
+ /**
36
+ * Determine if a query should skip memory retrieval.
37
+ * Returns true if retrieval should be skipped.
38
+ */
39
+ export function shouldSkipRetrieval(query: string): boolean {
40
+ const trimmed = query.trim();
41
+
42
+ // Force retrieve if query has memory-related intent (checked FIRST,
43
+ // before length check, so short CJK queries like "你记得吗" aren't skipped)
44
+ if (FORCE_RETRIEVE_PATTERNS.some(p => p.test(trimmed))) return false;
45
+
46
+ // Too short to be meaningful
47
+ if (trimmed.length < 5) return true;
48
+
49
+ // Skip if matches any skip pattern
50
+ if (SKIP_PATTERNS.some(p => p.test(trimmed))) return true;
51
+
52
+ // Skip very short non-question messages (likely commands or affirmations)
53
+ // CJK characters carry more meaning per character, so use a lower threshold
54
+ const hasCJK = /[\u4e00-\u9fff\u3040-\u309f\u30a0-\u30ff\uac00-\ud7af]/.test(trimmed);
55
+ const minLength = hasCJK ? 6 : 15;
56
+ if (trimmed.length < minLength && !trimmed.includes('?') && !trimmed.includes('?')) return true;
57
+
58
+ // Default: do retrieve
59
+ return false;
60
+ }