memory-lancedb-pro 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +489 -0
- package/README_CN.md +406 -0
- package/cli.ts +611 -0
- package/index.ts +698 -0
- package/openclaw.plugin.json +385 -0
- package/package.json +38 -0
- package/skills/lesson/SKILL.md +28 -0
- package/src/adaptive-retrieval.ts +60 -0
- package/src/embedder.ts +354 -0
- package/src/migrate.ts +356 -0
- package/src/noise-filter.ts +78 -0
- package/src/retriever.ts +722 -0
- package/src/scopes.ts +374 -0
- package/src/store.ts +567 -0
- package/src/tools.ts +639 -0
|
@@ -0,0 +1,385 @@
|
|
|
1
|
+
{
|
|
2
|
+
"id": "memory-lancedb-pro",
|
|
3
|
+
"name": "Memory (LanceDB Pro)",
|
|
4
|
+
"description": "Enhanced LanceDB-backed long-term memory with hybrid retrieval, multi-scope isolation, and management CLI",
|
|
5
|
+
"version": "2026.2.16",
|
|
6
|
+
"kind": "memory",
|
|
7
|
+
"configSchema": {
|
|
8
|
+
"type": "object",
|
|
9
|
+
"additionalProperties": false,
|
|
10
|
+
"properties": {
|
|
11
|
+
"embedding": {
|
|
12
|
+
"type": "object",
|
|
13
|
+
"additionalProperties": false,
|
|
14
|
+
"properties": {
|
|
15
|
+
"provider": {
|
|
16
|
+
"type": "string",
|
|
17
|
+
"const": "openai-compatible"
|
|
18
|
+
},
|
|
19
|
+
"apiKey": {
|
|
20
|
+
"type": "string"
|
|
21
|
+
},
|
|
22
|
+
"model": {
|
|
23
|
+
"type": "string"
|
|
24
|
+
},
|
|
25
|
+
"baseURL": {
|
|
26
|
+
"type": "string"
|
|
27
|
+
},
|
|
28
|
+
"dimensions": {
|
|
29
|
+
"type": "integer",
|
|
30
|
+
"minimum": 1
|
|
31
|
+
},
|
|
32
|
+
"taskQuery": {
|
|
33
|
+
"type": "string",
|
|
34
|
+
"description": "Embedding task for queries (provider-specific, e.g. Jina: retrieval.query)"
|
|
35
|
+
},
|
|
36
|
+
"taskPassage": {
|
|
37
|
+
"type": "string",
|
|
38
|
+
"description": "Embedding task for passages/documents (provider-specific, e.g. Jina: retrieval.passage)"
|
|
39
|
+
},
|
|
40
|
+
"normalized": {
|
|
41
|
+
"type": "boolean",
|
|
42
|
+
"description": "Request normalized embeddings when supported by the provider (e.g. Jina v5)"
|
|
43
|
+
}
|
|
44
|
+
},
|
|
45
|
+
"required": [
|
|
46
|
+
"apiKey"
|
|
47
|
+
]
|
|
48
|
+
},
|
|
49
|
+
"dbPath": {
|
|
50
|
+
"type": "string"
|
|
51
|
+
},
|
|
52
|
+
"enableManagementTools": {
|
|
53
|
+
"type": "boolean",
|
|
54
|
+
"default": false,
|
|
55
|
+
"description": "Enable memory_list and memory_stats management tools"
|
|
56
|
+
},
|
|
57
|
+
"autoCapture": {
|
|
58
|
+
"type": "boolean"
|
|
59
|
+
},
|
|
60
|
+
"autoRecall": {
|
|
61
|
+
"type": "boolean"
|
|
62
|
+
},
|
|
63
|
+
"captureAssistant": {
|
|
64
|
+
"type": "boolean"
|
|
65
|
+
},
|
|
66
|
+
"retrieval": {
|
|
67
|
+
"type": "object",
|
|
68
|
+
"additionalProperties": false,
|
|
69
|
+
"properties": {
|
|
70
|
+
"mode": {
|
|
71
|
+
"type": "string",
|
|
72
|
+
"enum": [
|
|
73
|
+
"hybrid",
|
|
74
|
+
"vector"
|
|
75
|
+
],
|
|
76
|
+
"default": "hybrid"
|
|
77
|
+
},
|
|
78
|
+
"vectorWeight": {
|
|
79
|
+
"type": "number",
|
|
80
|
+
"minimum": 0,
|
|
81
|
+
"maximum": 1,
|
|
82
|
+
"default": 0.7
|
|
83
|
+
},
|
|
84
|
+
"bm25Weight": {
|
|
85
|
+
"type": "number",
|
|
86
|
+
"minimum": 0,
|
|
87
|
+
"maximum": 1,
|
|
88
|
+
"default": 0.3
|
|
89
|
+
},
|
|
90
|
+
"minScore": {
|
|
91
|
+
"type": "number",
|
|
92
|
+
"minimum": 0,
|
|
93
|
+
"maximum": 1,
|
|
94
|
+
"default": 0.3
|
|
95
|
+
},
|
|
96
|
+
"rerank": {
|
|
97
|
+
"type": "string",
|
|
98
|
+
"enum": [
|
|
99
|
+
"cross-encoder",
|
|
100
|
+
"lightweight",
|
|
101
|
+
"none"
|
|
102
|
+
],
|
|
103
|
+
"default": "cross-encoder"
|
|
104
|
+
},
|
|
105
|
+
"rerankApiKey": {
|
|
106
|
+
"type": "string",
|
|
107
|
+
"description": "API key for reranker service (enables cross-encoder reranking)"
|
|
108
|
+
},
|
|
109
|
+
"rerankModel": {
|
|
110
|
+
"type": "string",
|
|
111
|
+
"default": "jina-reranker-v2-base-multilingual",
|
|
112
|
+
"description": "Reranker model name"
|
|
113
|
+
},
|
|
114
|
+
"rerankEndpoint": {
|
|
115
|
+
"type": "string",
|
|
116
|
+
"default": "https://api.jina.ai/v1/rerank",
|
|
117
|
+
"description": "Reranker API endpoint URL. Compatible with Jina, SiliconFlow, Pinecone, or any service with a similar interface."
|
|
118
|
+
},
|
|
119
|
+
"rerankProvider": {
|
|
120
|
+
"type": "string",
|
|
121
|
+
"enum": ["jina", "siliconflow", "pinecone"],
|
|
122
|
+
"default": "jina",
|
|
123
|
+
"description": "Reranker provider format. Determines request/response shape and auth header."
|
|
124
|
+
},
|
|
125
|
+
"candidatePoolSize": {
|
|
126
|
+
"type": "integer",
|
|
127
|
+
"minimum": 10,
|
|
128
|
+
"maximum": 100,
|
|
129
|
+
"default": 20
|
|
130
|
+
},
|
|
131
|
+
"recencyHalfLifeDays": {
|
|
132
|
+
"type": "number",
|
|
133
|
+
"minimum": 0,
|
|
134
|
+
"maximum": 365,
|
|
135
|
+
"default": 14,
|
|
136
|
+
"description": "Half-life in days for recency boost. Newer memories get higher scores. Set 0 to disable."
|
|
137
|
+
},
|
|
138
|
+
"recencyWeight": {
|
|
139
|
+
"type": "number",
|
|
140
|
+
"minimum": 0,
|
|
141
|
+
"maximum": 0.5,
|
|
142
|
+
"default": 0.1,
|
|
143
|
+
"description": "Maximum recency boost factor added to score"
|
|
144
|
+
},
|
|
145
|
+
"filterNoise": {
|
|
146
|
+
"type": "boolean",
|
|
147
|
+
"default": true,
|
|
148
|
+
"description": "Filter out noise memories (agent denials, meta-questions, boilerplate)"
|
|
149
|
+
},
|
|
150
|
+
"lengthNormAnchor": {
|
|
151
|
+
"type": "integer",
|
|
152
|
+
"minimum": 0,
|
|
153
|
+
"maximum": 5000,
|
|
154
|
+
"default": 500,
|
|
155
|
+
"description": "Length normalization anchor in chars. Entries longer than this get score penalized. Set 0 to disable."
|
|
156
|
+
},
|
|
157
|
+
"hardMinScore": {
|
|
158
|
+
"type": "number",
|
|
159
|
+
"minimum": 0,
|
|
160
|
+
"maximum": 1,
|
|
161
|
+
"default": 0.35,
|
|
162
|
+
"description": "Hard cutoff after all scoring stages. Results below this score are discarded."
|
|
163
|
+
},
|
|
164
|
+
"timeDecayHalfLifeDays": {
|
|
165
|
+
"type": "number",
|
|
166
|
+
"minimum": 0,
|
|
167
|
+
"maximum": 365,
|
|
168
|
+
"default": 60,
|
|
169
|
+
"description": "Time decay half-life in days. Old entries lose score gradually. Floor at 0.5x. Set 0 to disable."
|
|
170
|
+
}
|
|
171
|
+
}
|
|
172
|
+
},
|
|
173
|
+
"sessionMemory": {
|
|
174
|
+
"type": "object",
|
|
175
|
+
"additionalProperties": false,
|
|
176
|
+
"properties": {
|
|
177
|
+
"enabled": {
|
|
178
|
+
"type": "boolean",
|
|
179
|
+
"default": true,
|
|
180
|
+
"description": "Store session summaries to LanceDB on /new command (replaces built-in session-memory hook)"
|
|
181
|
+
},
|
|
182
|
+
"messageCount": {
|
|
183
|
+
"type": "integer",
|
|
184
|
+
"minimum": 1,
|
|
185
|
+
"maximum": 100,
|
|
186
|
+
"default": 15,
|
|
187
|
+
"description": "Number of recent messages to include in session summary"
|
|
188
|
+
}
|
|
189
|
+
}
|
|
190
|
+
},
|
|
191
|
+
"scopes": {
|
|
192
|
+
"type": "object",
|
|
193
|
+
"additionalProperties": false,
|
|
194
|
+
"properties": {
|
|
195
|
+
"default": {
|
|
196
|
+
"type": "string",
|
|
197
|
+
"default": "global"
|
|
198
|
+
},
|
|
199
|
+
"definitions": {
|
|
200
|
+
"type": "object",
|
|
201
|
+
"additionalProperties": {
|
|
202
|
+
"type": "object",
|
|
203
|
+
"properties": {
|
|
204
|
+
"description": {
|
|
205
|
+
"type": "string"
|
|
206
|
+
}
|
|
207
|
+
}
|
|
208
|
+
}
|
|
209
|
+
},
|
|
210
|
+
"agentAccess": {
|
|
211
|
+
"type": "object",
|
|
212
|
+
"additionalProperties": {
|
|
213
|
+
"type": "array",
|
|
214
|
+
"items": {
|
|
215
|
+
"type": "string"
|
|
216
|
+
}
|
|
217
|
+
}
|
|
218
|
+
}
|
|
219
|
+
}
|
|
220
|
+
}
|
|
221
|
+
},
|
|
222
|
+
"required": [
|
|
223
|
+
"embedding"
|
|
224
|
+
]
|
|
225
|
+
},
|
|
226
|
+
"uiHints": {
|
|
227
|
+
"embedding.apiKey": {
|
|
228
|
+
"label": "API Key",
|
|
229
|
+
"sensitive": true,
|
|
230
|
+
"placeholder": "sk-proj-... or ${GEMINI_API_KEY} or 'ollama'",
|
|
231
|
+
"help": "API key for the embedding provider (or use ${OPENAI_API_KEY}; use a dummy value for keyless local endpoints)"
|
|
232
|
+
},
|
|
233
|
+
"embedding.model": {
|
|
234
|
+
"label": "Embedding Model",
|
|
235
|
+
"placeholder": "text-embedding-3-small",
|
|
236
|
+
"help": "Embedding model name (e.g. text-embedding-3-small, gemini-embedding-001, nomic-embed-text)"
|
|
237
|
+
},
|
|
238
|
+
"embedding.baseURL": {
|
|
239
|
+
"label": "Base URL",
|
|
240
|
+
"placeholder": "https://api.openai.com/v1",
|
|
241
|
+
"help": "Custom base URL for OpenAI-compatible embedding endpoints (e.g. https://generativelanguage.googleapis.com/v1beta/openai/ for Gemini, http://localhost:11434/v1 for Ollama)",
|
|
242
|
+
"advanced": true
|
|
243
|
+
},
|
|
244
|
+
"embedding.dimensions": {
|
|
245
|
+
"label": "Vector Dimensions",
|
|
246
|
+
"placeholder": "auto-detected from model",
|
|
247
|
+
"help": "Override vector dimensions for custom models not in the built-in lookup table",
|
|
248
|
+
"advanced": true
|
|
249
|
+
},
|
|
250
|
+
"dbPath": {
|
|
251
|
+
"label": "Database Path",
|
|
252
|
+
"placeholder": "~/.openclaw/memory/lancedb-pro",
|
|
253
|
+
"help": "Directory path for the LanceDB database files",
|
|
254
|
+
"advanced": true
|
|
255
|
+
},
|
|
256
|
+
"autoCapture": {
|
|
257
|
+
"label": "Auto-Capture",
|
|
258
|
+
"help": "Automatically capture important information from conversations"
|
|
259
|
+
},
|
|
260
|
+
"autoRecall": {
|
|
261
|
+
"label": "Auto-Recall",
|
|
262
|
+
"help": "Automatically inject relevant memories into context"
|
|
263
|
+
},
|
|
264
|
+
"captureAssistant": {
|
|
265
|
+
"label": "Capture Assistant Messages",
|
|
266
|
+
"help": "Also auto-capture assistant messages (default false to reduce memory pollution)",
|
|
267
|
+
"advanced": true
|
|
268
|
+
},
|
|
269
|
+
"retrieval.mode": {
|
|
270
|
+
"label": "Retrieval Mode",
|
|
271
|
+
"help": "Use hybrid search (vector + BM25) or vector-only for backward compatibility",
|
|
272
|
+
"advanced": true
|
|
273
|
+
},
|
|
274
|
+
"retrieval.vectorWeight": {
|
|
275
|
+
"label": "Vector Search Weight",
|
|
276
|
+
"help": "Weight for vector similarity in hybrid search (0-1)",
|
|
277
|
+
"advanced": true
|
|
278
|
+
},
|
|
279
|
+
"retrieval.bm25Weight": {
|
|
280
|
+
"label": "BM25 Search Weight",
|
|
281
|
+
"help": "Weight for BM25 keyword search in hybrid search (0-1)",
|
|
282
|
+
"advanced": true
|
|
283
|
+
},
|
|
284
|
+
"retrieval.minScore": {
|
|
285
|
+
"label": "Minimum Score Threshold",
|
|
286
|
+
"help": "Drop results below this relevance score (0-1)",
|
|
287
|
+
"advanced": true
|
|
288
|
+
},
|
|
289
|
+
"retrieval.rerank": {
|
|
290
|
+
"label": "Reranking Mode",
|
|
291
|
+
"help": "Re-score fused results for better quality (cross-encoder uses configured reranker API)",
|
|
292
|
+
"advanced": true
|
|
293
|
+
},
|
|
294
|
+
"retrieval.rerankApiKey": {
|
|
295
|
+
"label": "Reranker API Key",
|
|
296
|
+
"sensitive": true,
|
|
297
|
+
"placeholder": "jina_... / sk-... / pcsk_...",
|
|
298
|
+
"help": "Reranker API key for cross-encoder reranking",
|
|
299
|
+
"advanced": true
|
|
300
|
+
},
|
|
301
|
+
"retrieval.rerankModel": {
|
|
302
|
+
"label": "Reranker Model",
|
|
303
|
+
"placeholder": "jina-reranker-v2-base-multilingual",
|
|
304
|
+
"help": "Reranker model name (e.g. jina-reranker-v2-base-multilingual, BAAI/bge-reranker-v2-m3)",
|
|
305
|
+
"advanced": true
|
|
306
|
+
},
|
|
307
|
+
"retrieval.rerankEndpoint": {
|
|
308
|
+
"label": "Reranker Endpoint",
|
|
309
|
+
"placeholder": "https://api.jina.ai/v1/rerank",
|
|
310
|
+
"help": "Custom reranker API endpoint URL",
|
|
311
|
+
"advanced": true
|
|
312
|
+
},
|
|
313
|
+
"retrieval.rerankProvider": {
|
|
314
|
+
"label": "Reranker Provider",
|
|
315
|
+
"help": "Provider format: jina (default), siliconflow, or pinecone",
|
|
316
|
+
"advanced": true
|
|
317
|
+
},
|
|
318
|
+
"retrieval.candidatePoolSize": {
|
|
319
|
+
"label": "Candidate Pool Size",
|
|
320
|
+
"help": "Number of candidates to fetch before fusion and reranking",
|
|
321
|
+
"advanced": true
|
|
322
|
+
},
|
|
323
|
+
"sessionMemory.enabled": {
|
|
324
|
+
"label": "Session Memory",
|
|
325
|
+
"help": "Store session summaries to LanceDB when /new is triggered (replaces built-in session-memory hook)"
|
|
326
|
+
},
|
|
327
|
+
"sessionMemory.messageCount": {
|
|
328
|
+
"label": "Session Message Count",
|
|
329
|
+
"help": "Number of recent messages to include in session summaries",
|
|
330
|
+
"advanced": true
|
|
331
|
+
},
|
|
332
|
+
"scopes.default": {
|
|
333
|
+
"label": "Default Scope",
|
|
334
|
+
"help": "Default memory scope for new memories",
|
|
335
|
+
"advanced": true
|
|
336
|
+
},
|
|
337
|
+
"scopes.definitions": {
|
|
338
|
+
"label": "Scope Definitions",
|
|
339
|
+
"help": "Define custom memory scopes with descriptions",
|
|
340
|
+
"advanced": true
|
|
341
|
+
},
|
|
342
|
+
"scopes.agentAccess": {
|
|
343
|
+
"label": "Agent Access Control",
|
|
344
|
+
"help": "Define which scopes each agent can access",
|
|
345
|
+
"advanced": true
|
|
346
|
+
},
|
|
347
|
+
"enableManagementTools": {
|
|
348
|
+
"label": "Management Tools",
|
|
349
|
+
"help": "Enable memory_list and memory_stats tools for debugging and auditing",
|
|
350
|
+
"advanced": true
|
|
351
|
+
},
|
|
352
|
+
"retrieval.lengthNormAnchor": {
|
|
353
|
+
"label": "Length Normalization Anchor",
|
|
354
|
+
"help": "Entries longer than this (chars) get score penalized to prevent long entries dominating. 0 = disabled.",
|
|
355
|
+
"advanced": true
|
|
356
|
+
},
|
|
357
|
+
"retrieval.hardMinScore": {
|
|
358
|
+
"label": "Hard Minimum Score",
|
|
359
|
+
"help": "Discard results below this score after all scoring stages. Higher = fewer but more relevant results.",
|
|
360
|
+
"advanced": true
|
|
361
|
+
},
|
|
362
|
+
"retrieval.timeDecayHalfLifeDays": {
|
|
363
|
+
"label": "Time Decay Half-Life",
|
|
364
|
+
"help": "Old entries lose score over this many days. Floor at 0.5x. 0 = disabled.",
|
|
365
|
+
"advanced": true
|
|
366
|
+
},
|
|
367
|
+
"embedding.taskQuery": {
|
|
368
|
+
"label": "Query Task",
|
|
369
|
+
"placeholder": "retrieval.query",
|
|
370
|
+
"help": "Optional task selector for query embeddings (Jina: retrieval.query). If unset, no task field is sent.",
|
|
371
|
+
"advanced": true
|
|
372
|
+
},
|
|
373
|
+
"embedding.taskPassage": {
|
|
374
|
+
"label": "Passage Task",
|
|
375
|
+
"placeholder": "retrieval.passage",
|
|
376
|
+
"help": "Optional task selector for passage/document embeddings (Jina: retrieval.passage). If unset, no task field is sent.",
|
|
377
|
+
"advanced": true
|
|
378
|
+
},
|
|
379
|
+
"embedding.normalized": {
|
|
380
|
+
"label": "Normalized Embeddings",
|
|
381
|
+
"help": "Request normalized embeddings when the provider supports it (Jina v5). If unset, the field is not sent.",
|
|
382
|
+
"advanced": true
|
|
383
|
+
}
|
|
384
|
+
}
|
|
385
|
+
}
|
package/package.json
ADDED
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "memory-lancedb-pro",
|
|
3
|
+
"version": "1.0.0",
|
|
4
|
+
"description": "OpenClaw enhanced LanceDB memory plugin with hybrid retrieval (Vector + BM25), cross-encoder rerank, multi-scope isolation, and management CLI",
|
|
5
|
+
"type": "module",
|
|
6
|
+
"main": "index.ts",
|
|
7
|
+
"keywords": [
|
|
8
|
+
"openclaw",
|
|
9
|
+
"openclaw-plugin",
|
|
10
|
+
"memory",
|
|
11
|
+
"lancedb",
|
|
12
|
+
"vector-search",
|
|
13
|
+
"bm25",
|
|
14
|
+
"hybrid-retrieval",
|
|
15
|
+
"rerank",
|
|
16
|
+
"ai-memory",
|
|
17
|
+
"long-term-memory"
|
|
18
|
+
],
|
|
19
|
+
"repository": {
|
|
20
|
+
"type": "git",
|
|
21
|
+
"url": "https://github.com/win4r/memory-lancedb-pro"
|
|
22
|
+
},
|
|
23
|
+
"author": "win4r",
|
|
24
|
+
"license": "MIT",
|
|
25
|
+
"dependencies": {
|
|
26
|
+
"@lancedb/lancedb": "^0.26.2",
|
|
27
|
+
"@sinclair/typebox": "0.34.48",
|
|
28
|
+
"openai": "^6.21.0"
|
|
29
|
+
},
|
|
30
|
+
"openclaw": {
|
|
31
|
+
"extensions": [
|
|
32
|
+
"./index.ts"
|
|
33
|
+
]
|
|
34
|
+
},
|
|
35
|
+
"devDependencies": {
|
|
36
|
+
"typescript": "^5.9.3"
|
|
37
|
+
}
|
|
38
|
+
}
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: lesson
|
|
3
|
+
description: Store a lesson learned from the current conversation. Triggered by /lesson command. Use when Master signals that the recent conversation contains a pitfall, fix, or key insight that should be persisted to long-term memory.
|
|
4
|
+
---
|
|
5
|
+
|
|
6
|
+
# Lesson Extraction & Storage
|
|
7
|
+
|
|
8
|
+
When triggered, extract and store lessons from the **recent conversation context**.
|
|
9
|
+
|
|
10
|
+
## Steps
|
|
11
|
+
|
|
12
|
+
1. **Scan recent context** — identify the pitfall, bug fix, or key insight just discussed
|
|
13
|
+
2. **Store technical layer** (category: fact, importance ≥ 0.8):
|
|
14
|
+
```
|
|
15
|
+
Pitfall: [symptom]. Cause: [root cause]. Fix: [solution]. Prevention: [how to avoid].
|
|
16
|
+
```
|
|
17
|
+
3. **Store principle layer** (category: decision, importance ≥ 0.85):
|
|
18
|
+
```
|
|
19
|
+
Decision principle ([tag]): [behavioral rule]. Trigger: [when]. Action: [what to do].
|
|
20
|
+
```
|
|
21
|
+
4. **Verify** — `memory_recall` with anchor keywords to confirm both entries retrievable
|
|
22
|
+
5. **Report** — tell Master what was stored (brief summary)
|
|
23
|
+
|
|
24
|
+
## Rules
|
|
25
|
+
|
|
26
|
+
- Keep entries short and atomic (< 500 chars each)
|
|
27
|
+
- If the lesson also affects a checklist or SKILL.md, update those files too
|
|
28
|
+
- If no clear lesson is found in recent context, ask Master what to store
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Adaptive Retrieval
|
|
3
|
+
* Determines whether a query needs memory retrieval at all.
|
|
4
|
+
* Skips retrieval for greetings, commands, simple instructions, and system messages.
|
|
5
|
+
* Saves embedding API calls and reduces noise injection.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
// Queries that are clearly NOT memory-retrieval candidates
|
|
9
|
+
const SKIP_PATTERNS = [
|
|
10
|
+
// Greetings & pleasantries
|
|
11
|
+
/^(hi|hello|hey|good\s*(morning|afternoon|evening|night)|greetings|yo|sup|howdy|what'?s up)\b/i,
|
|
12
|
+
// System/bot commands
|
|
13
|
+
/^\//, // slash commands
|
|
14
|
+
/^(run|build|test|ls|cd|git|npm|pip|docker|curl|cat|grep|find|make|sudo)\b/i,
|
|
15
|
+
// Simple affirmations/negations
|
|
16
|
+
/^(yes|no|yep|nope|ok|okay|sure|fine|thanks|thank you|thx|ty|got it|understood|cool|nice|great|good|perfect|awesome|👍|👎|✅|❌)\s*[.!]?$/i,
|
|
17
|
+
// Continuation prompts
|
|
18
|
+
/^(go ahead|continue|proceed|do it|start|begin|next|实施|开始|继续|好的|可以|行)\s*[.!]?$/i,
|
|
19
|
+
// Pure emoji
|
|
20
|
+
/^[\p{Emoji}\s]+$/u,
|
|
21
|
+
// Heartbeat/system
|
|
22
|
+
/^HEARTBEAT/i,
|
|
23
|
+
/^\[System/i,
|
|
24
|
+
];
|
|
25
|
+
|
|
26
|
+
// Queries that SHOULD trigger retrieval even if short
|
|
27
|
+
const FORCE_RETRIEVE_PATTERNS = [
|
|
28
|
+
/\b(remember|recall|forgot|memory|memories)\b/i,
|
|
29
|
+
/\b(last time|before|previously|earlier|yesterday|ago)\b/i,
|
|
30
|
+
/\b(my (name|email|phone|address|birthday|preference))\b/i,
|
|
31
|
+
/\b(what did (i|we)|did i (tell|say|mention))\b/i,
|
|
32
|
+
/(你记得|之前|上次|以前|还记得|提到过|说过)/i,
|
|
33
|
+
];
|
|
34
|
+
|
|
35
|
+
/**
|
|
36
|
+
* Determine if a query should skip memory retrieval.
|
|
37
|
+
* Returns true if retrieval should be skipped.
|
|
38
|
+
*/
|
|
39
|
+
export function shouldSkipRetrieval(query: string): boolean {
|
|
40
|
+
const trimmed = query.trim();
|
|
41
|
+
|
|
42
|
+
// Force retrieve if query has memory-related intent (checked FIRST,
|
|
43
|
+
// before length check, so short CJK queries like "你记得吗" aren't skipped)
|
|
44
|
+
if (FORCE_RETRIEVE_PATTERNS.some(p => p.test(trimmed))) return false;
|
|
45
|
+
|
|
46
|
+
// Too short to be meaningful
|
|
47
|
+
if (trimmed.length < 5) return true;
|
|
48
|
+
|
|
49
|
+
// Skip if matches any skip pattern
|
|
50
|
+
if (SKIP_PATTERNS.some(p => p.test(trimmed))) return true;
|
|
51
|
+
|
|
52
|
+
// Skip very short non-question messages (likely commands or affirmations)
|
|
53
|
+
// CJK characters carry more meaning per character, so use a lower threshold
|
|
54
|
+
const hasCJK = /[\u4e00-\u9fff\u3040-\u309f\u30a0-\u30ff\uac00-\ud7af]/.test(trimmed);
|
|
55
|
+
const minLength = hasCJK ? 6 : 15;
|
|
56
|
+
if (trimmed.length < minLength && !trimmed.includes('?') && !trimmed.includes('?')) return true;
|
|
57
|
+
|
|
58
|
+
// Default: do retrieve
|
|
59
|
+
return false;
|
|
60
|
+
}
|