claude-mem-lite 2.51.0 → 2.52.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -10,7 +10,7 @@
10
10
  "plugins": [
11
11
  {
12
12
  "name": "claude-mem-lite",
13
- "version": "2.51.0",
13
+ "version": "2.52.0",
14
14
  "source": "./",
15
15
  "description": "Lightweight persistent memory system for Claude Code — FTS5 search, episode batching, error-triggered recall"
16
16
  }
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "claude-mem-lite",
3
- "version": "2.51.0",
3
+ "version": "2.52.0",
4
4
  "description": "Lightweight persistent memory system for Claude Code — FTS5 search, episode batching, error-triggered recall",
5
5
  "author": {
6
6
  "name": "sdsrss"
package/cli/doctor.mjs CHANGED
@@ -12,7 +12,32 @@ export async function cmdDoctor(db, args) {
12
12
  if (args.includes('--benchmark')) {
13
13
  const { runBenchmark } = await import('../lib/doctor-benchmark.mjs');
14
14
  const project = inferProject();
15
- const result = runBenchmark(db, { project });
15
+ // Sample recent user prompts so the CLI report has non-null injection_rate
16
+ // and hook latency. Without this, runBenchmark's prompts default of [] makes
17
+ // every metric 0/null — a dead command from the user's perspective. Tests
18
+ // bypass this CLI layer and call runBenchmark() directly, so the lib API
19
+ // contract (default prompts=[]) is unchanged.
20
+ let prompts = [];
21
+ try {
22
+ const limitIdx = args.indexOf('--prompts-limit');
23
+ let limit = 50;
24
+ if (limitIdx >= 0 && args[limitIdx + 1]) {
25
+ const parsed = parseInt(args[limitIdx + 1], 10);
26
+ if (Number.isFinite(parsed) && parsed > 0 && parsed <= 1000) limit = parsed;
27
+ }
28
+ const rows = db.prepare(`
29
+ SELECT p.prompt_text
30
+ FROM user_prompts p
31
+ JOIN sdk_sessions s ON p.content_session_id = s.content_session_id
32
+ WHERE s.project = ?
33
+ AND p.prompt_text IS NOT NULL
34
+ AND length(p.prompt_text) >= 15
35
+ ORDER BY p.created_at_epoch DESC
36
+ LIMIT ?
37
+ `).all(project, limit);
38
+ prompts = rows.map(r => r.prompt_text).filter(Boolean);
39
+ } catch { /* missing/empty tables on a fresh DB → leave prompts=[] */ }
40
+ const result = runBenchmark(db, { project, prompts });
16
41
  out(JSON.stringify(result, null, 2));
17
42
  return;
18
43
  }
package/hook.mjs CHANGED
@@ -27,6 +27,7 @@ import {
27
27
  extractErrorKeywords, extractFilePaths, isRelatedToEpisode,
28
28
  makeEntryDesc, scrubSecrets, EDIT_TOOLS, debugCatch, debugLog,
29
29
  COMPRESSED_AUTO, COMPRESSED_PENDING_PURGE, isoWeekKey, OBS_BM25,
30
+ computeMinHash, estimateJaccardFromMinHash, jaccardSimilarity,
30
31
  } from './utils.mjs';
31
32
  import {
32
33
  readEpisodeRaw, episodeFile,
@@ -747,7 +748,8 @@ async function handleSessionStart() {
747
748
  `).run();
748
749
  if (boosted.changes > 0) debugLog('DEBUG', 'auto-maintain', `boosted ${boosted.changes} frequently-accessed observations`);
749
750
 
750
- // Auto-dedup: merge near-identical observations (same title, same project, within 1h)
751
+ // Auto-dedup (exact): merge identical-title observations within 1h.
752
+ // Catches rapid duplicate writes (same hook firing twice, race conditions).
751
753
  const dupPairs = db.prepare(`
752
754
  SELECT a.id as keep_id, b.id as remove_id
753
755
  FROM observations a
@@ -765,6 +767,55 @@ async function handleSessionStart() {
765
767
  debugLog('DEBUG', 'auto-maintain', `auto-deduped ${dupPairs.length} near-identical observations`);
766
768
  }
767
769
 
770
+ // Auto-dedup (fuzzy): catches near-identical titles that exact-match
771
+ // misses across larger time windows — e.g. episode-batch titles like
772
+ // "Modified A.mjs, B.mjs" vs "Modified B.mjs, A.mjs" written days apart.
773
+ // MinHash pre-filter (≥0.7) cuts the O(N²) scan; Jaccard ≥0.95 stays
774
+ // well clear of legit "two updates same area" pairs (those typically
775
+ // score 0.7–0.85, surfaced via `maintain scan` for manual review).
776
+ // Bounded by ${SCAN_LIMIT} recent rows × ${FUZZY_MAX_MERGES}-merge cap.
777
+ if (!process.env.CLAUDE_MEM_SKIP_AUTO_DEDUP_FUZZY) {
778
+ const SCAN_LIMIT = 500;
779
+ const FUZZY_MAX_MERGES = 20;
780
+ const FUZZY_THRESHOLD = 0.95;
781
+ const MINHASH_PREFILTER = 0.7;
782
+ const recent = db.prepare(`
783
+ SELECT id, title, importance, created_at_epoch
784
+ FROM observations
785
+ WHERE COALESCE(compressed_into, 0) = 0
786
+ AND superseded_at IS NULL
787
+ AND created_at_epoch > ?
788
+ AND title IS NOT NULL AND title != ''
789
+ ORDER BY created_at_epoch DESC LIMIT ${SCAN_LIMIT}
790
+ `).all(STALE_AGE);
791
+ if (recent.length >= 2) {
792
+ const titles = recent.map(r => r.title.trim());
793
+ const minhashes = titles.map(t => t ? computeMinHash(t) : null);
794
+ const fuzzyRemoveIds = [];
795
+ const removed = new Set();
796
+ outer: for (let i = 0; i < recent.length; i++) {
797
+ if (!minhashes[i] || removed.has(recent[i].id)) continue;
798
+ for (let j = i + 1; j < recent.length; j++) {
799
+ if (!minhashes[j] || removed.has(recent[j].id)) continue;
800
+ if (estimateJaccardFromMinHash(minhashes[i], minhashes[j]) < MINHASH_PREFILTER) continue;
801
+ if (jaccardSimilarity(titles[i], titles[j]) < FUZZY_THRESHOLD) continue;
802
+ // Keep the higher-importance row; tiebreak by older (lower id wins access history)
803
+ const keep = (recent[i].importance ?? 1) >= (recent[j].importance ?? 1) ? recent[i] : recent[j];
804
+ const remove = keep === recent[i] ? recent[j] : recent[i];
805
+ fuzzyRemoveIds.push(remove.id);
806
+ removed.add(remove.id);
807
+ if (fuzzyRemoveIds.length >= FUZZY_MAX_MERGES) break outer;
808
+ }
809
+ }
810
+ if (fuzzyRemoveIds.length > 0) {
811
+ const ph = fuzzyRemoveIds.map(() => '?').join(',');
812
+ db.prepare(`UPDATE observations SET superseded_at = ?, superseded_by = 'auto-dedup-fuzzy' WHERE id IN (${ph})`)
813
+ .run(Date.now(), ...fuzzyRemoveIds);
814
+ debugLog('DEBUG', 'auto-maintain', `fuzzy auto-deduped ${fuzzyRemoveIds.length} near-identical observations`);
815
+ }
816
+ }
817
+ }
818
+
768
819
  // Mark maintenance as done (24h gate) — even though compression runs in background
769
820
  writeFileSync(maintainFile, JSON.stringify({ epoch: Date.now() }));
770
821
  // Weekly summary grouping runs in background to avoid blocking SessionStart
@@ -5,6 +5,7 @@
5
5
 
6
6
  import { notLowSignalTitleClause } from '../scoring-sql.mjs';
7
7
  import { truncate } from '../format-utils.mjs';
8
+ import { COMPRESSED_PENDING_PURGE } from '../utils.mjs';
8
9
 
9
10
  export function computeQualityStats(db, { project, days }) {
10
11
  const projectFilter = project ? 'AND project = ?' : '';
@@ -69,11 +70,22 @@ export function computeQualityStats(db, { project, days }) {
69
70
  LIMIT 5
70
71
  `).all(...baseParams);
71
72
 
72
- return { windowRow, allTimeRow, typeRows, topLessons, project, days };
73
+ // Pending-purge backlog: compressed records waiting on the time-based purge gate.
74
+ // High ratio signals push/pull imbalance — auto-mark fires daily but purge needs
75
+ // age > 37d, so a sudden write surge inflates this until the cohort ages out.
76
+ const purgeRow = db.prepare(`
77
+ SELECT
78
+ SUM(CASE WHEN compressed_into IS NOT NULL AND compressed_into != 0 THEN 1 ELSE 0 END) as compressed,
79
+ SUM(CASE WHEN compressed_into = ${COMPRESSED_PENDING_PURGE} THEN 1 ELSE 0 END) as pending_purge
80
+ FROM observations
81
+ WHERE 1=1 ${projectFilter}
82
+ `).get(...baseParams);
83
+
84
+ return { windowRow, allTimeRow, typeRows, topLessons, purgeRow, project, days };
73
85
  }
74
86
 
75
87
  export function formatQualityReport(data) {
76
- const { windowRow, allTimeRow, typeRows, topLessons, project, days } = data;
88
+ const { windowRow, allTimeRow, typeRows, topLessons, purgeRow, project, days } = data;
77
89
  const pct = (n, d) => d > 0 ? (100 * n / d).toFixed(1) : '0.0';
78
90
  const scope = project ? ` — ${project}` : '';
79
91
  const lines = [];
@@ -126,5 +138,16 @@ export function formatQualityReport(data) {
126
138
  lines.push(` ${lessonStatus} Lesson rate ≥ 15% → currently ${lessonPct}% (gap ${lessonGap >= 0 ? '+' : ''}${lessonGap}pp)`);
127
139
  lines.push(` ${noiseStatus} LOW_SIGNAL ≤ 30% → currently ${noisePct}% (gap ${noiseGap >= 0 ? '+' : ''}${noiseGap}pp)`);
128
140
 
141
+ // Pending-purge ratio: fraction of compressed records still waiting deletion.
142
+ // Compressed-but-not-yet-purged is normal (37d retention floor); a high ratio
143
+ // either means a recent write surge OR that auto-maintain isn't running.
144
+ if (purgeRow && (purgeRow.compressed ?? 0) > 0) {
145
+ const purgePct = pct(purgeRow.pending_purge, purgeRow.compressed);
146
+ const purgeNum = parseFloat(purgePct);
147
+ const purgeGap = (purgeNum - 10).toFixed(1);
148
+ const purgeStatus = purgeNum <= 10 ? '✅' : (purgeNum <= 30 ? '🟡' : '🔴');
149
+ lines.push(` ${purgeStatus} Pending purge ≤ 10% → currently ${purgePct}% (${purgeRow.pending_purge}/${purgeRow.compressed}) (gap ${purgeGap >= 0 ? '+' : ''}${purgeGap}pp)${purgeNum > 10 ? ' — run: claude-mem-lite maintain execute --ops purge_stale --confirm' : ''}`);
150
+ }
151
+
129
152
  return lines.join('\n');
130
153
  }
package/mem-cli.mjs CHANGED
@@ -412,6 +412,7 @@ function searchFts(db, ftsQuery, { type, project, limit, dateFrom, dateTo, minIm
412
412
  if (queryVec) {
413
413
  const vecResults = vectorSearch(db, queryVec, {
414
414
  project: project || null,
415
+ type: type || null,
415
416
  vocabVersion: vocab.version,
416
417
  limit: VECTOR_SCAN_LIMIT,
417
418
  });
@@ -420,9 +421,14 @@ function searchFts(db, ftsQuery, { type, project, limit, dateFrom, dateTo, minIm
420
421
  const rowMap = new Map(ftsRows.map(r => [r.id, r]));
421
422
  for (const vr of vecResults) {
422
423
  if (!rowMap.has(vr.id)) {
423
- const obs = db.prepare('SELECT id, type, title, subtitle, created_at, created_at_epoch, lesson_learned, importance, branch, files_modified FROM observations WHERE id = ?').get(vr.id);
424
+ const obs = db.prepare('SELECT id, type, title, subtitle, project, created_at, created_at_epoch, lesson_learned, importance, branch, files_modified FROM observations WHERE id = ?').get(vr.id);
424
425
  if (obs) {
425
- // Apply same filters as FTS5 query (aligned with MCP searchObservations)
426
+ // Apply same filters as FTS5 query (aligned with MCP searchObservations).
427
+ // Defense-in-depth: vectorSearch already filters type/project, but the
428
+ // post-filter keeps both gates symmetric so a future vectorSearch refactor
429
+ // can't silently leak across them (cf. #8162 paired-path lesson).
430
+ if (type && obs.type !== type) continue;
431
+ if (project && obs.project !== project) continue;
426
432
  if (dateFrom && obs.created_at_epoch < dateFrom) continue;
427
433
  if (dateTo && obs.created_at_epoch > dateTo) continue;
428
434
  if (minImportance && (obs.importance ?? 1) < minImportance) continue;
@@ -440,9 +446,11 @@ function searchFts(db, ftsQuery, { type, project, limit, dateFrom, dateTo, minIm
440
446
  .slice(0, limit);
441
447
  } else if (vecResults.length > 0 && ftsRows.length === 0) {
442
448
  return vecResults
443
- .map(vr => db.prepare('SELECT id, type, title, subtitle, created_at, created_at_epoch, lesson_learned, importance, branch FROM observations WHERE id = ?').get(vr.id))
449
+ .map(vr => db.prepare('SELECT id, type, title, subtitle, project, created_at, created_at_epoch, lesson_learned, importance, branch FROM observations WHERE id = ?').get(vr.id))
444
450
  .filter(obs => {
445
451
  if (!obs) return false;
452
+ if (type && obs.type !== type) return false;
453
+ if (project && obs.project !== project) return false;
446
454
  if (dateFrom && obs.created_at_epoch < dateFrom) return false;
447
455
  if (dateTo && obs.created_at_epoch > dateTo) return false;
448
456
  if (minImportance && (obs.importance ?? 1) < minImportance) return false;
@@ -2002,7 +2010,11 @@ Commands:
2002
2010
  timeline Show observations around an anchor (shows recent if no anchor)
2003
2011
  --anchor ID Center on this ID. Accepts N, #N, P#N, or S#N — P#/S# anchors
2004
2012
  resolve to the nearest-in-time observation in the same project.
2005
- --query "text" Find anchor by FTS5 search
2013
+ --query "text" Find anchor by FTS5 search. Ranks by BM25 × time-decay,
2014
+ so multi-term queries surface the BEST topical match
2015
+ (highest term coverage), not the most recent. For
2016
+ "recent activity around X", use 'recent' or
2017
+ 'search "X" --sort time' instead.
2006
2018
  --before N Show N before anchor (default 5)
2007
2019
  --after N Show N after anchor (default 5)
2008
2020
  --project P Filter by project
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "claude-mem-lite",
3
- "version": "2.51.0",
3
+ "version": "2.52.0",
4
4
  "description": "Lightweight persistent memory system for Claude Code",
5
5
  "type": "module",
6
6
  "engines": {
package/tool-schemas.mjs CHANGED
@@ -318,11 +318,13 @@ export const tools = [
318
318
  name: 'mem_timeline',
319
319
  description:
320
320
  'Show observations before and after an anchor point (by ID or by FTS query).\n' +
321
+ 'Query-anchor ranks by BM25 × time-decay → BEST topical match, not most recent.\n' +
321
322
  '\n' +
322
323
  'DO NOT use when:\n' +
323
324
  ' - You only want one record (use mem_get)\n' +
324
325
  ' - You have no anchor in mind and are just browsing (use mem_recent or mem_browse)\n' +
325
326
  ' - The sequence is obvious from commit history (use git log)\n' +
327
+ ' - You want "recent activity around X" (use mem_recent or mem_search sort="time")\n' +
326
328
  '\n' +
327
329
  'USE when:\n' +
328
330
  ' - Reconstructing what led up to / followed a specific bug or decision\n' +