npm - claude-mem-lite - Versions diffs - 2.51.0 → 2.52.0 - Mend

claude-mem-lite 2.51.0 → 2.52.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

package/.claude-plugin/marketplace.json +1 -1
package/.claude-plugin/plugin.json +1 -1
package/cli/doctor.mjs +26 -1
package/hook.mjs +52 -1
package/lib/stats-quality.mjs +25 -2
package/mem-cli.mjs +16 -4
package/package.json +1 -1
package/tool-schemas.mjs +2 -0

package/.claude-plugin/marketplace.json CHANGED Viewed

@@ -10,7 +10,7 @@
   "plugins": [
     {
       "name": "claude-mem-lite",
-      "version": "2.51.0",
+      "version": "2.52.0",
       "source": "./",
       "description": "Lightweight persistent memory system for Claude Code — FTS5 search, episode batching, error-triggered recall"
     }

package/.claude-plugin/plugin.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "claude-mem-lite",
-  "version": "2.51.0",
+  "version": "2.52.0",
   "description": "Lightweight persistent memory system for Claude Code — FTS5 search, episode batching, error-triggered recall",
   "author": {
     "name": "sdsrss"

package/cli/doctor.mjs CHANGED Viewed

@@ -12,7 +12,32 @@ export async function cmdDoctor(db, args) {
   if (args.includes('--benchmark')) {
     const { runBenchmark } = await import('../lib/doctor-benchmark.mjs');
     const project = inferProject();
-    const result = runBenchmark(db, { project });
+    // Sample recent user prompts so the CLI report has non-null injection_rate
+    // and hook latency. Without this, runBenchmark's prompts default of [] makes
+    // every metric 0/null — a dead command from the user's perspective. Tests
+    // bypass this CLI layer and call runBenchmark() directly, so the lib API
+    // contract (default prompts=[]) is unchanged.
+    let prompts = [];
+    try {
+      const limitIdx = args.indexOf('--prompts-limit');
+      let limit = 50;
+      if (limitIdx >= 0 && args[limitIdx + 1]) {
+        const parsed = parseInt(args[limitIdx + 1], 10);
+        if (Number.isFinite(parsed) && parsed > 0 && parsed <= 1000) limit = parsed;
+      }
+      const rows = db.prepare(`
+        SELECT p.prompt_text
+        FROM user_prompts p
+        JOIN sdk_sessions s ON p.content_session_id = s.content_session_id
+        WHERE s.project = ?
+          AND p.prompt_text IS NOT NULL
+          AND length(p.prompt_text) >= 15
+        ORDER BY p.created_at_epoch DESC
+        LIMIT ?
+      `).all(project, limit);
+      prompts = rows.map(r => r.prompt_text).filter(Boolean);
+    } catch { /* missing/empty tables on a fresh DB → leave prompts=[] */ }
+    const result = runBenchmark(db, { project, prompts });
     out(JSON.stringify(result, null, 2));
     return;
   }

package/hook.mjs CHANGED Viewed

@@ -27,6 +27,7 @@ import {
   extractErrorKeywords, extractFilePaths, isRelatedToEpisode,
   makeEntryDesc, scrubSecrets, EDIT_TOOLS, debugCatch, debugLog,
   COMPRESSED_AUTO, COMPRESSED_PENDING_PURGE, isoWeekKey, OBS_BM25,
+  computeMinHash, estimateJaccardFromMinHash, jaccardSimilarity,
 } from './utils.mjs';
 import {
   readEpisodeRaw, episodeFile,
@@ -747,7 +748,8 @@ async function handleSessionStart() {
         `).run();
         if (boosted.changes > 0) debugLog('DEBUG', 'auto-maintain', `boosted ${boosted.changes} frequently-accessed observations`);
-        // Auto-dedup: merge near-identical observations (same title, same project, within 1h)
+        // Auto-dedup (exact): merge identical-title observations within 1h.
+        // Catches rapid duplicate writes (same hook firing twice, race conditions).
         const dupPairs = db.prepare(`
           SELECT a.id as keep_id, b.id as remove_id
           FROM observations a
@@ -765,6 +767,55 @@ async function handleSessionStart() {
           debugLog('DEBUG', 'auto-maintain', `auto-deduped ${dupPairs.length} near-identical observations`);
         }
+        // Auto-dedup (fuzzy): catches near-identical titles that exact-match
+        // misses across larger time windows — e.g. episode-batch titles like
+        // "Modified A.mjs, B.mjs" vs "Modified B.mjs, A.mjs" written days apart.
+        // MinHash pre-filter (≥0.7) cuts the O(N²) scan; Jaccard ≥0.95 stays
+        // well clear of legit "two updates same area" pairs (those typically
+        // score 0.7–0.85, surfaced via `maintain scan` for manual review).
+        // Bounded by ${SCAN_LIMIT} recent rows × ${FUZZY_MAX_MERGES}-merge cap.
+        if (!process.env.CLAUDE_MEM_SKIP_AUTO_DEDUP_FUZZY) {
+          const SCAN_LIMIT = 500;
+          const FUZZY_MAX_MERGES = 20;
+          const FUZZY_THRESHOLD = 0.95;
+          const MINHASH_PREFILTER = 0.7;
+          const recent = db.prepare(`
+            SELECT id, title, importance, created_at_epoch
+            FROM observations
+            WHERE COALESCE(compressed_into, 0) = 0
+              AND superseded_at IS NULL
+              AND created_at_epoch > ?
+              AND title IS NOT NULL AND title != ''
+            ORDER BY created_at_epoch DESC LIMIT ${SCAN_LIMIT}
+          `).all(STALE_AGE);
+          if (recent.length >= 2) {
+            const titles = recent.map(r => r.title.trim());
+            const minhashes = titles.map(t => t ? computeMinHash(t) : null);
+            const fuzzyRemoveIds = [];
+            const removed = new Set();
+            outer: for (let i = 0; i < recent.length; i++) {
+              if (!minhashes[i] || removed.has(recent[i].id)) continue;
+              for (let j = i + 1; j < recent.length; j++) {
+                if (!minhashes[j] || removed.has(recent[j].id)) continue;
+                if (estimateJaccardFromMinHash(minhashes[i], minhashes[j]) < MINHASH_PREFILTER) continue;
+                if (jaccardSimilarity(titles[i], titles[j]) < FUZZY_THRESHOLD) continue;
+                // Keep the higher-importance row; tiebreak by older (lower id wins access history)
+                const keep = (recent[i].importance ?? 1) >= (recent[j].importance ?? 1) ? recent[i] : recent[j];
+                const remove = keep === recent[i] ? recent[j] : recent[i];
+                fuzzyRemoveIds.push(remove.id);
+                removed.add(remove.id);
+                if (fuzzyRemoveIds.length >= FUZZY_MAX_MERGES) break outer;
+              }
+            }
+            if (fuzzyRemoveIds.length > 0) {
+              const ph = fuzzyRemoveIds.map(() => '?').join(',');
+              db.prepare(`UPDATE observations SET superseded_at = ?, superseded_by = 'auto-dedup-fuzzy' WHERE id IN (${ph})`)
+                .run(Date.now(), ...fuzzyRemoveIds);
+              debugLog('DEBUG', 'auto-maintain', `fuzzy auto-deduped ${fuzzyRemoveIds.length} near-identical observations`);
+            }
+          }
+        }
         // Mark maintenance as done (24h gate) — even though compression runs in background
         writeFileSync(maintainFile, JSON.stringify({ epoch: Date.now() }));
         // Weekly summary grouping runs in background to avoid blocking SessionStart

package/lib/stats-quality.mjs CHANGED Viewed

@@ -5,6 +5,7 @@
 import { notLowSignalTitleClause } from '../scoring-sql.mjs';
 import { truncate } from '../format-utils.mjs';
+import { COMPRESSED_PENDING_PURGE } from '../utils.mjs';
 export function computeQualityStats(db, { project, days }) {
   const projectFilter = project ? 'AND project = ?' : '';
@@ -69,11 +70,22 @@ export function computeQualityStats(db, { project, days }) {
     LIMIT 5
   `).all(...baseParams);
-  return { windowRow, allTimeRow, typeRows, topLessons, project, days };
+  // Pending-purge backlog: compressed records waiting on the time-based purge gate.
+  // High ratio signals push/pull imbalance — auto-mark fires daily but purge needs
+  // age > 37d, so a sudden write surge inflates this until the cohort ages out.
+  const purgeRow = db.prepare(`
+    SELECT
+      SUM(CASE WHEN compressed_into IS NOT NULL AND compressed_into != 0 THEN 1 ELSE 0 END) as compressed,
+      SUM(CASE WHEN compressed_into = ${COMPRESSED_PENDING_PURGE} THEN 1 ELSE 0 END) as pending_purge
+    FROM observations
+    WHERE 1=1 ${projectFilter}
+  `).get(...baseParams);
+  return { windowRow, allTimeRow, typeRows, topLessons, purgeRow, project, days };
 }
 export function formatQualityReport(data) {
-  const { windowRow, allTimeRow, typeRows, topLessons, project, days } = data;
+  const { windowRow, allTimeRow, typeRows, topLessons, purgeRow, project, days } = data;
   const pct = (n, d) => d > 0 ? (100 * n / d).toFixed(1) : '0.0';
   const scope = project ? ` — ${project}` : '';
   const lines = [];
@@ -126,5 +138,16 @@ export function formatQualityReport(data) {
   lines.push(`    ${lessonStatus} Lesson rate ≥ 15%    → currently ${lessonPct}%  (gap ${lessonGap >= 0 ? '+' : ''}${lessonGap}pp)`);
   lines.push(`    ${noiseStatus} LOW_SIGNAL  ≤ 30%    → currently ${noisePct}%  (gap ${noiseGap >= 0 ? '+' : ''}${noiseGap}pp)`);
+  // Pending-purge ratio: fraction of compressed records still waiting deletion.
+  // Compressed-but-not-yet-purged is normal (37d retention floor); a high ratio
+  // either means a recent write surge OR that auto-maintain isn't running.
+  if (purgeRow && (purgeRow.compressed ?? 0) > 0) {
+    const purgePct = pct(purgeRow.pending_purge, purgeRow.compressed);
+    const purgeNum = parseFloat(purgePct);
+    const purgeGap = (purgeNum - 10).toFixed(1);
+    const purgeStatus = purgeNum <= 10 ? '✅' : (purgeNum <= 30 ? '🟡' : '🔴');
+    lines.push(`    ${purgeStatus} Pending purge ≤ 10%  → currently ${purgePct}% (${purgeRow.pending_purge}/${purgeRow.compressed})  (gap ${purgeGap >= 0 ? '+' : ''}${purgeGap}pp)${purgeNum > 10 ? ' — run: claude-mem-lite maintain execute --ops purge_stale --confirm' : ''}`);
+  }
   return lines.join('\n');
 }

package/mem-cli.mjs CHANGED Viewed

@@ -412,6 +412,7 @@ function searchFts(db, ftsQuery, { type, project, limit, dateFrom, dateTo, minIm
       if (queryVec) {
         const vecResults = vectorSearch(db, queryVec, {
           project: project || null,
+          type: type || null,
           vocabVersion: vocab.version,
           limit: VECTOR_SCAN_LIMIT,
         });
@@ -420,9 +421,14 @@ function searchFts(db, ftsQuery, { type, project, limit, dateFrom, dateTo, minIm
           const rowMap = new Map(ftsRows.map(r => [r.id, r]));
           for (const vr of vecResults) {
             if (!rowMap.has(vr.id)) {
-              const obs = db.prepare('SELECT id, type, title, subtitle, created_at, created_at_epoch, lesson_learned, importance, branch, files_modified FROM observations WHERE id = ?').get(vr.id);
+              const obs = db.prepare('SELECT id, type, title, subtitle, project, created_at, created_at_epoch, lesson_learned, importance, branch, files_modified FROM observations WHERE id = ?').get(vr.id);
               if (obs) {
-                // Apply same filters as FTS5 query (aligned with MCP searchObservations)
+                // Apply same filters as FTS5 query (aligned with MCP searchObservations).
+                // Defense-in-depth: vectorSearch already filters type/project, but the
+                // post-filter keeps both gates symmetric so a future vectorSearch refactor
+                // can't silently leak across them (cf. #8162 paired-path lesson).
+                if (type && obs.type !== type) continue;
+                if (project && obs.project !== project) continue;
                 if (dateFrom && obs.created_at_epoch < dateFrom) continue;
                 if (dateTo && obs.created_at_epoch > dateTo) continue;
                 if (minImportance && (obs.importance ?? 1) < minImportance) continue;
@@ -440,9 +446,11 @@ function searchFts(db, ftsQuery, { type, project, limit, dateFrom, dateTo, minIm
             .slice(0, limit);
         } else if (vecResults.length > 0 && ftsRows.length === 0) {
           return vecResults
-            .map(vr => db.prepare('SELECT id, type, title, subtitle, created_at, created_at_epoch, lesson_learned, importance, branch FROM observations WHERE id = ?').get(vr.id))
+            .map(vr => db.prepare('SELECT id, type, title, subtitle, project, created_at, created_at_epoch, lesson_learned, importance, branch FROM observations WHERE id = ?').get(vr.id))
             .filter(obs => {
               if (!obs) return false;
+              if (type && obs.type !== type) return false;
+              if (project && obs.project !== project) return false;
               if (dateFrom && obs.created_at_epoch < dateFrom) return false;
               if (dateTo && obs.created_at_epoch > dateTo) return false;
               if (minImportance && (obs.importance ?? 1) < minImportance) return false;
@@ -2002,7 +2010,11 @@ Commands:
   timeline              Show observations around an anchor (shows recent if no anchor)
     --anchor ID         Center on this ID. Accepts N, #N, P#N, or S#N — P#/S# anchors
                         resolve to the nearest-in-time observation in the same project.
-    --query "text"      Find anchor by FTS5 search
+    --query "text"      Find anchor by FTS5 search. Ranks by BM25 × time-decay,
+                        so multi-term queries surface the BEST topical match
+                        (highest term coverage), not the most recent. For
+                        "recent activity around X", use 'recent' or
+                        'search "X" --sort time' instead.
     --before N          Show N before anchor (default 5)
     --after N           Show N after anchor (default 5)
     --project P         Filter by project

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "claude-mem-lite",
-  "version": "2.51.0",
+  "version": "2.52.0",
   "description": "Lightweight persistent memory system for Claude Code",
   "type": "module",
   "engines": {

package/tool-schemas.mjs CHANGED Viewed

@@ -318,11 +318,13 @@ export const tools = [
     name: 'mem_timeline',
     description:
       'Show observations before and after an anchor point (by ID or by FTS query).\n' +
+      'Query-anchor ranks by BM25 × time-decay → BEST topical match, not most recent.\n' +
       '\n' +
       'DO NOT use when:\n' +
       '  - You only want one record (use mem_get)\n' +
       '  - You have no anchor in mind and are just browsing (use mem_recent or mem_browse)\n' +
       '  - The sequence is obvious from commit history (use git log)\n' +
+      '  - You want "recent activity around X" (use mem_recent or mem_search sort="time")\n' +
       '\n' +
       'USE when:\n' +
       '  - Reconstructing what led up to / followed a specific bug or decision\n' +