memory-lancedb-pro 1.0.11 → 1.0.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -1,5 +1,12 @@
1
1
  # Changelog
2
2
 
3
+ ## 1.0.12
4
+
5
+ - Fix: ghost memories stuck in autoRecall after deletion (#15). BM25-only results from stale FTS index are now validated via `store.hasId()` before inclusion in fused results. Removed the BM25-only floor score of 0.5 that allowed deleted entries to survive `hardMinScore` filtering.
6
+ - Fix: HEARTBEAT pattern now matches anywhere in the prompt (not just at start), preventing autoRecall from triggering on prefixed HEARTBEAT messages.
7
+ - Add: `autoRecallMinLength` config option to set a custom minimum prompt length for autoRecall (default: 15 chars English, 6 CJK). Prompts shorter than this threshold are skipped.
8
+ - Add: `ping`, `pong`, `test`, `debug` added to skip patterns in adaptive retrieval.
9
+
3
10
  ## 1.0.11
4
11
 
5
12
  - Change: set `autoRecall` default to `false` to avoid the model echoing injected `<relevant-memories>` blocks.
package/index.ts CHANGED
@@ -37,6 +37,7 @@ interface PluginConfig {
37
37
  dbPath?: string;
38
38
  autoCapture?: boolean;
39
39
  autoRecall?: boolean;
40
+ autoRecallMinLength?: number;
40
41
  captureAssistant?: boolean;
41
42
  retrieval?: {
42
43
  mode?: "hybrid" | "vector";
@@ -200,7 +201,7 @@ async function readSessionMessages(filePath: string, messageCount: number): Prom
200
201
  }
201
202
  }
202
203
  }
203
- } catch {}
204
+ } catch { }
204
205
  }
205
206
 
206
207
  if (messages.length === 0) return null;
@@ -225,7 +226,7 @@ async function readSessionContentWithResetFallback(sessionFilePath: string, mess
225
226
  const latestResetPath = join(dir, resetCandidates[resetCandidates.length - 1]);
226
227
  return await readSessionMessages(latestResetPath, messageCount);
227
228
  }
228
- } catch {}
229
+ } catch { }
229
230
 
230
231
  return primary;
231
232
  }
@@ -264,7 +265,7 @@ async function findPreviousSessionFile(sessionsDir: string, currentSessionFile?:
264
265
  .sort().reverse();
265
266
  if (nonReset.length > 0) return join(sessionsDir, nonReset[0]);
266
267
  }
267
- } catch {}
268
+ } catch { }
268
269
  }
269
270
 
270
271
  // ============================================================================
@@ -367,7 +368,7 @@ const memoryLanceDBProPlugin = {
367
368
  // Default is OFF to prevent the model from accidentally echoing injected context.
368
369
  if (config.autoRecall === true) {
369
370
  api.on("before_agent_start", async (event, ctx) => {
370
- if (!event.prompt || shouldSkipRetrieval(event.prompt)) {
371
+ if (!event.prompt || shouldSkipRetrieval(event.prompt, config.autoRecallMinLength)) {
371
372
  return;
372
373
  }
373
374
 
@@ -623,7 +624,7 @@ const memoryLanceDBProPlugin = {
623
624
  if (files.length > 7) {
624
625
  const { unlink } = await import("node:fs/promises");
625
626
  for (const old of files.slice(0, files.length - 7)) {
626
- await unlink(join(backupDir, old)).catch(() => {});
627
+ await unlink(join(backupDir, old)).catch(() => { });
627
628
  }
628
629
  }
629
630
 
@@ -664,10 +665,10 @@ const memoryLanceDBProPlugin = {
664
665
 
665
666
  api.logger.info(
666
667
  `memory-lancedb-pro: initialized successfully ` +
667
- `(embedding: ${embedTest.success ? "OK" : "FAIL"}, ` +
668
- `retrieval: ${retrievalTest.success ? "OK" : "FAIL"}, ` +
669
- `mode: ${retrievalTest.mode}, ` +
670
- `FTS: ${retrievalTest.hasFtsSupport ? "enabled" : "disabled"})`
668
+ `(embedding: ${embedTest.success ? "OK" : "FAIL"}, ` +
669
+ `retrieval: ${retrievalTest.success ? "OK" : "FAIL"}, ` +
670
+ `mode: ${retrievalTest.mode}, ` +
671
+ `FTS: ${retrievalTest.hasFtsSupport ? "enabled" : "disabled"})`
671
672
  );
672
673
 
673
674
  if (!embedTest.success) {
@@ -701,54 +702,55 @@ const memoryLanceDBProPlugin = {
701
702
  };
702
703
 
703
704
  function parsePluginConfig(value: unknown): PluginConfig {
704
- if (!value || typeof value !== "object" || Array.isArray(value)) {
705
- throw new Error("memory-lancedb-pro config required");
706
- }
707
- const cfg = value as Record<string, unknown>;
705
+ if (!value || typeof value !== "object" || Array.isArray(value)) {
706
+ throw new Error("memory-lancedb-pro config required");
707
+ }
708
+ const cfg = value as Record<string, unknown>;
708
709
 
709
- const embedding = cfg.embedding as Record<string, unknown> | undefined;
710
- if (!embedding) {
711
- throw new Error("embedding config is required");
712
- }
710
+ const embedding = cfg.embedding as Record<string, unknown> | undefined;
711
+ if (!embedding) {
712
+ throw new Error("embedding config is required");
713
+ }
713
714
 
714
- const apiKey = typeof embedding.apiKey === "string"
715
- ? embedding.apiKey
716
- : process.env.OPENAI_API_KEY || "";
715
+ const apiKey = typeof embedding.apiKey === "string"
716
+ ? embedding.apiKey
717
+ : process.env.OPENAI_API_KEY || "";
717
718
 
718
- if (!apiKey) {
719
- throw new Error("embedding.apiKey is required (set directly or via OPENAI_API_KEY env var)");
720
- }
719
+ if (!apiKey) {
720
+ throw new Error("embedding.apiKey is required (set directly or via OPENAI_API_KEY env var)");
721
+ }
721
722
 
722
- return {
723
- embedding: {
724
- provider: "openai-compatible",
725
- apiKey,
726
- model: typeof embedding.model === "string" ? embedding.model : "text-embedding-3-small",
727
- baseURL: typeof embedding.baseURL === "string" ? resolveEnvVars(embedding.baseURL) : undefined,
728
- // Accept number, numeric string, or env-var string (e.g. "${EMBED_DIM}").
729
- // Also accept legacy top-level `dimensions` for convenience.
730
- dimensions: parsePositiveInt(embedding.dimensions ?? cfg.dimensions),
731
- taskQuery: typeof embedding.taskQuery === "string" ? embedding.taskQuery : undefined,
732
- taskPassage: typeof embedding.taskPassage === "string" ? embedding.taskPassage : undefined,
733
- normalized: typeof embedding.normalized === "boolean" ? embedding.normalized : undefined,
734
- },
735
- dbPath: typeof cfg.dbPath === "string" ? cfg.dbPath : undefined,
736
- autoCapture: cfg.autoCapture !== false,
737
- // Default OFF: only enable when explicitly set to true.
738
- autoRecall: cfg.autoRecall === true,
739
- captureAssistant: cfg.captureAssistant === true,
740
- retrieval: typeof cfg.retrieval === "object" && cfg.retrieval !== null ? cfg.retrieval as any : undefined,
741
- scopes: typeof cfg.scopes === "object" && cfg.scopes !== null ? cfg.scopes as any : undefined,
742
- enableManagementTools: cfg.enableManagementTools === true,
743
- sessionMemory: typeof cfg.sessionMemory === "object" && cfg.sessionMemory !== null
744
- ? {
745
- enabled: (cfg.sessionMemory as Record<string, unknown>).enabled !== false,
746
- messageCount: typeof (cfg.sessionMemory as Record<string, unknown>).messageCount === "number"
747
- ? (cfg.sessionMemory as Record<string, unknown>).messageCount as number
748
- : undefined,
749
- }
750
- : undefined,
751
- };
723
+ return {
724
+ embedding: {
725
+ provider: "openai-compatible",
726
+ apiKey,
727
+ model: typeof embedding.model === "string" ? embedding.model : "text-embedding-3-small",
728
+ baseURL: typeof embedding.baseURL === "string" ? resolveEnvVars(embedding.baseURL) : undefined,
729
+ // Accept number, numeric string, or env-var string (e.g. "${EMBED_DIM}").
730
+ // Also accept legacy top-level `dimensions` for convenience.
731
+ dimensions: parsePositiveInt(embedding.dimensions ?? cfg.dimensions),
732
+ taskQuery: typeof embedding.taskQuery === "string" ? embedding.taskQuery : undefined,
733
+ taskPassage: typeof embedding.taskPassage === "string" ? embedding.taskPassage : undefined,
734
+ normalized: typeof embedding.normalized === "boolean" ? embedding.normalized : undefined,
735
+ },
736
+ dbPath: typeof cfg.dbPath === "string" ? cfg.dbPath : undefined,
737
+ autoCapture: cfg.autoCapture !== false,
738
+ // Default OFF: only enable when explicitly set to true.
739
+ autoRecall: cfg.autoRecall === true,
740
+ autoRecallMinLength: parsePositiveInt(cfg.autoRecallMinLength),
741
+ captureAssistant: cfg.captureAssistant === true,
742
+ retrieval: typeof cfg.retrieval === "object" && cfg.retrieval !== null ? cfg.retrieval as any : undefined,
743
+ scopes: typeof cfg.scopes === "object" && cfg.scopes !== null ? cfg.scopes as any : undefined,
744
+ enableManagementTools: cfg.enableManagementTools === true,
745
+ sessionMemory: typeof cfg.sessionMemory === "object" && cfg.sessionMemory !== null
746
+ ? {
747
+ enabled: (cfg.sessionMemory as Record<string, unknown>).enabled !== false,
748
+ messageCount: typeof (cfg.sessionMemory as Record<string, unknown>).messageCount === "number"
749
+ ? (cfg.sessionMemory as Record<string, unknown>).messageCount as number
750
+ : undefined,
751
+ }
752
+ : undefined,
753
+ };
752
754
  }
753
755
 
754
756
  export default memoryLanceDBProPlugin;
@@ -2,7 +2,7 @@
2
2
  "id": "memory-lancedb-pro",
3
3
  "name": "Memory (LanceDB Pro)",
4
4
  "description": "Enhanced LanceDB-backed long-term memory with hybrid retrieval, multi-scope isolation, and management CLI",
5
- "version": "1.0.11",
5
+ "version": "1.0.12",
6
6
  "kind": "memory",
7
7
  "configSchema": {
8
8
  "type": "object",
@@ -61,6 +61,13 @@
61
61
  "type": "boolean",
62
62
  "default": false
63
63
  },
64
+ "autoRecallMinLength": {
65
+ "type": "integer",
66
+ "minimum": 1,
67
+ "maximum": 200,
68
+ "default": 15,
69
+ "description": "Minimum prompt length (in characters) to trigger auto-recall. Prompts shorter than this are skipped. Default: 15 for English, 6 for CJK."
70
+ },
64
71
  "captureAssistant": {
65
72
  "type": "boolean"
66
73
  },
@@ -266,6 +273,11 @@
266
273
  "label": "Auto-Recall",
267
274
  "help": "Automatically inject relevant memories into context"
268
275
  },
276
+ "autoRecallMinLength": {
277
+ "label": "Auto-Recall Min Length",
278
+ "help": "Minimum prompt length to trigger auto-recall (shorter prompts are skipped). Default: 15 chars for English, 6 for CJK.",
279
+ "advanced": true
280
+ },
269
281
  "captureAssistant": {
270
282
  "label": "Capture Assistant Messages",
271
283
  "help": "Also auto-capture assistant messages (default false to reduce memory pollution)",
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "memory-lancedb-pro",
3
- "version": "1.0.11",
3
+ "version": "1.0.12",
4
4
  "description": "OpenClaw enhanced LanceDB memory plugin with hybrid retrieval (Vector + BM25), cross-encoder rerank, multi-scope isolation, and management CLI",
5
5
  "type": "module",
6
6
  "main": "index.ts",
@@ -40,4 +40,4 @@
40
40
  "jiti": "^2.6.0",
41
41
  "typescript": "^5.9.3"
42
42
  }
43
- }
43
+ }
@@ -18,9 +18,11 @@ const SKIP_PATTERNS = [
18
18
  /^(go ahead|continue|proceed|do it|start|begin|next|实施|开始|继续|好的|可以|行)\s*[.!]?$/i,
19
19
  // Pure emoji
20
20
  /^[\p{Emoji}\s]+$/u,
21
- // Heartbeat/system
22
- /^HEARTBEAT/i,
21
+ // Heartbeat/system (match anywhere, not just at start, to handle prefixed formats)
22
+ /HEARTBEAT/i,
23
23
  /^\[System/i,
24
+ // Single-word utility pings
25
+ /^(ping|pong|test|debug)\s*[.!?]?$/i,
24
26
  ];
25
27
 
26
28
  // Queries that SHOULD trigger retrieval even if short
@@ -61,8 +63,10 @@ function normalizeQuery(query: string): string {
61
63
  /**
62
64
  * Determine if a query should skip memory retrieval.
63
65
  * Returns true if retrieval should be skipped.
66
+ * @param query The raw prompt text
67
+ * @param minLength Optional minimum length override (if set, overrides built-in thresholds)
64
68
  */
65
- export function shouldSkipRetrieval(query: string): boolean {
69
+ export function shouldSkipRetrieval(query: string, minLength?: number): boolean {
66
70
  const trimmed = normalizeQuery(query);
67
71
 
68
72
  // Force retrieve if query has memory-related intent (checked FIRST,
@@ -75,11 +79,17 @@ export function shouldSkipRetrieval(query: string): boolean {
75
79
  // Skip if matches any skip pattern
76
80
  if (SKIP_PATTERNS.some(p => p.test(trimmed))) return true;
77
81
 
82
+ // If caller provides a custom minimum length, use it
83
+ if (minLength !== undefined && minLength > 0) {
84
+ if (trimmed.length < minLength && !trimmed.includes('?') && !trimmed.includes('?')) return true;
85
+ return false;
86
+ }
87
+
78
88
  // Skip very short non-question messages (likely commands or affirmations)
79
89
  // CJK characters carry more meaning per character, so use a lower threshold
80
90
  const hasCJK = /[\u4e00-\u9fff\u3040-\u309f\u30a0-\u30ff\uac00-\ud7af]/.test(trimmed);
81
- const minLength = hasCJK ? 6 : 15;
82
- if (trimmed.length < minLength && !trimmed.includes('?') && !trimmed.includes('?')) return true;
91
+ const defaultMinLength = hasCJK ? 6 : 15;
92
+ if (trimmed.length < defaultMinLength && !trimmed.includes('?') && !trimmed.includes('?')) return true;
83
93
 
84
94
  // Default: do retrieve
85
95
  return false;
package/src/retriever.ts CHANGED
@@ -282,8 +282,8 @@ export class MemoryRetriever {
282
282
  this.runBM25Search(query, candidatePoolSize, scopeFilter, category),
283
283
  ]);
284
284
 
285
- // Fuse results using RRF
286
- const fusedResults = this.fuseResults(vectorResults, bm25Results);
285
+ // Fuse results using RRF (async: validates BM25-only entries exist in store)
286
+ const fusedResults = await this.fuseResults(vectorResults, bm25Results);
287
287
 
288
288
  // Apply minimum score threshold
289
289
  const filtered = fusedResults.filter(r => r.score >= this.config.minScore);
@@ -357,10 +357,10 @@ export class MemoryRetriever {
357
357
  }));
358
358
  }
359
359
 
360
- private fuseResults(
360
+ private async fuseResults(
361
361
  vectorResults: Array<MemorySearchResult & { rank: number }>,
362
362
  bm25Results: Array<MemorySearchResult & { rank: number }>
363
- ): RetrievalResult[] {
363
+ ): Promise<RetrievalResult[]> {
364
364
  // Create maps for quick lookup
365
365
  const vectorMap = new Map<string, MemorySearchResult & { rank: number }>();
366
366
  const bm25Map = new Map<string, MemorySearchResult & { rank: number }>();
@@ -383,6 +383,18 @@ export class MemoryRetriever {
383
383
  const vectorResult = vectorMap.get(id);
384
384
  const bm25Result = bm25Map.get(id);
385
385
 
386
+ // FIX(#15): BM25-only results may be "ghost" entries whose vector data was
387
+ // deleted but whose FTS index entry lingers until the next index rebuild.
388
+ // Validate that the entry actually exists in the store before including it.
389
+ if (!vectorResult && bm25Result) {
390
+ try {
391
+ const exists = await this.store.hasId(id);
392
+ if (!exists) continue; // Skip ghost entry
393
+ } catch {
394
+ // If hasId fails, keep the result (fail-open)
395
+ }
396
+ }
397
+
386
398
  // Use the result with more complete data (prefer vector result if both exist)
387
399
  const baseResult = vectorResult || bm25Result!;
388
400
 
@@ -392,12 +404,12 @@ export class MemoryRetriever {
392
404
  const bm25Hit = bm25Result ? 1 : 0;
393
405
 
394
406
  // Base = vector score; BM25 hit boosts by up to 15%
395
- // BM25-only results use their normalized score (floor 0.5) so exact keyword
396
- // matches aren't buried — e.g. searching "JINA_API_KEY" should surface even
397
- // when vector distance is large.
407
+ // BM25-only results use their raw BM25 score so exact keyword matches
408
+ // (e.g. searching "JINA_API_KEY") still surface. The previous floor of 0.5
409
+ // was too generous and allowed ghost entries to survive hardMinScore (0.35).
398
410
  const fusedScore = vectorResult
399
411
  ? clamp01(vectorScore + (bm25Hit * 0.15 * vectorScore), 0.1)
400
- : clamp01(Math.max(bm25Result!.score, 0.5), 0.1);
412
+ : clamp01(bm25Result!.score, 0.1);
401
413
 
402
414
  fusedResults.push({
403
415
  entry: baseResult.entry,