npm - claude-mem-lite - Versions diffs - 2.51.0 → 2.53.0 - Mend

claude-mem-lite 2.51.0 → 2.53.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

package/.claude-plugin/marketplace.json +1 -1
package/.claude-plugin/plugin.json +1 -1
package/cli/doctor.mjs +26 -1
package/hook-context.mjs +9 -4
package/hook.mjs +52 -1
package/lib/stats-quality.mjs +25 -2
package/mem-cli.mjs +100 -197
package/package.json +2 -1
package/search-engine.mjs +249 -0
package/server.mjs +9 -236
package/source-files.mjs +1 -1
package/tool-schemas.mjs +2 -0

package/.claude-plugin/marketplace.json CHANGED Viewed

@@ -10,7 +10,7 @@
   "plugins": [
     {
       "name": "claude-mem-lite",
-      "version": "2.51.0",
+      "version": "2.53.0",
       "source": "./",
       "description": "Lightweight persistent memory system for Claude Code — FTS5 search, episode batching, error-triggered recall"
     }

package/.claude-plugin/plugin.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "claude-mem-lite",
-  "version": "2.51.0",
+  "version": "2.53.0",
   "description": "Lightweight persistent memory system for Claude Code — FTS5 search, episode batching, error-triggered recall",
   "author": {
     "name": "sdsrss"

package/cli/doctor.mjs CHANGED Viewed

@@ -12,7 +12,32 @@ export async function cmdDoctor(db, args) {
   if (args.includes('--benchmark')) {
     const { runBenchmark } = await import('../lib/doctor-benchmark.mjs');
     const project = inferProject();
-    const result = runBenchmark(db, { project });
+    // Sample recent user prompts so the CLI report has non-null injection_rate
+    // and hook latency. Without this, runBenchmark's prompts default of [] makes
+    // every metric 0/null — a dead command from the user's perspective. Tests
+    // bypass this CLI layer and call runBenchmark() directly, so the lib API
+    // contract (default prompts=[]) is unchanged.
+    let prompts = [];
+    try {
+      const limitIdx = args.indexOf('--prompts-limit');
+      let limit = 50;
+      if (limitIdx >= 0 && args[limitIdx + 1]) {
+        const parsed = parseInt(args[limitIdx + 1], 10);
+        if (Number.isFinite(parsed) && parsed > 0 && parsed <= 1000) limit = parsed;
+      }
+      const rows = db.prepare(`
+        SELECT p.prompt_text
+        FROM user_prompts p
+        JOIN sdk_sessions s ON p.content_session_id = s.content_session_id
+        WHERE s.project = ?
+          AND p.prompt_text IS NOT NULL
+          AND length(p.prompt_text) >= 15
+        ORDER BY p.created_at_epoch DESC
+        LIMIT ?
+      `).all(project, limit);
+      prompts = rows.map(r => r.prompt_text).filter(Boolean);
+    } catch { /* missing/empty tables on a fresh DB → leave prompts=[] */ }
+    const result = runBenchmark(db, { project, prompts });
     out(JSON.stringify(result, null, 2));
     return;
   }

package/hook-context.mjs CHANGED Viewed

@@ -369,19 +369,24 @@ export function buildSessionContextLines(db, project, now = new Date(), currentC
   // 5. Working state from latest /clear handoff.
   // Session scoping: when currentCcSessionId is provided, restrict to this session's
   // own clear handoff so parallel sessions don't see each other's Working State block.
+  // TTL: drop handoffs older than 48h. Without it, `cmdContext` (no session id) would
+  // surface a /clear from days ago as "current Working State" — confusing when the user
+  // has long moved on. 48h covers overnight breaks but excludes truly stale state.
+  const HANDOFF_TTL_MS = 48 * 60 * 60 * 1000;
+  const handoffMinEpoch = Date.now() - HANDOFF_TTL_MS;
   const prevClearHandoff = currentCcSessionId
     ? db.prepare(`
         SELECT working_on, unfinished, key_files
         FROM session_handoffs
-        WHERE project = ? AND type = 'clear' AND session_id = ?
+        WHERE project = ? AND type = 'clear' AND session_id = ? AND created_at_epoch > ?
         ORDER BY created_at_epoch DESC LIMIT 1
-      `).get(project, currentCcSessionId)
+      `).get(project, currentCcSessionId, handoffMinEpoch)
     : db.prepare(`
         SELECT working_on, unfinished, key_files
         FROM session_handoffs
-        WHERE project = ? AND type = 'clear'
+        WHERE project = ? AND type = 'clear' AND created_at_epoch > ?
         ORDER BY created_at_epoch DESC LIMIT 1
-      `).get(project);
+      `).get(project, handoffMinEpoch);
   const handoffLines = [];
   if (prevClearHandoff) {

package/hook.mjs CHANGED Viewed

@@ -27,6 +27,7 @@ import {
   extractErrorKeywords, extractFilePaths, isRelatedToEpisode,
   makeEntryDesc, scrubSecrets, EDIT_TOOLS, debugCatch, debugLog,
   COMPRESSED_AUTO, COMPRESSED_PENDING_PURGE, isoWeekKey, OBS_BM25,
+  computeMinHash, estimateJaccardFromMinHash, jaccardSimilarity,
 } from './utils.mjs';
 import {
   readEpisodeRaw, episodeFile,
@@ -747,7 +748,8 @@ async function handleSessionStart() {
         `).run();
         if (boosted.changes > 0) debugLog('DEBUG', 'auto-maintain', `boosted ${boosted.changes} frequently-accessed observations`);
-        // Auto-dedup: merge near-identical observations (same title, same project, within 1h)
+        // Auto-dedup (exact): merge identical-title observations within 1h.
+        // Catches rapid duplicate writes (same hook firing twice, race conditions).
         const dupPairs = db.prepare(`
           SELECT a.id as keep_id, b.id as remove_id
           FROM observations a
@@ -765,6 +767,55 @@ async function handleSessionStart() {
           debugLog('DEBUG', 'auto-maintain', `auto-deduped ${dupPairs.length} near-identical observations`);
         }
+        // Auto-dedup (fuzzy): catches near-identical titles that exact-match
+        // misses across larger time windows — e.g. episode-batch titles like
+        // "Modified A.mjs, B.mjs" vs "Modified B.mjs, A.mjs" written days apart.
+        // MinHash pre-filter (≥0.7) cuts the O(N²) scan; Jaccard ≥0.95 stays
+        // well clear of legit "two updates same area" pairs (those typically
+        // score 0.7–0.85, surfaced via `maintain scan` for manual review).
+        // Bounded by ${SCAN_LIMIT} recent rows × ${FUZZY_MAX_MERGES}-merge cap.
+        if (!process.env.CLAUDE_MEM_SKIP_AUTO_DEDUP_FUZZY) {
+          const SCAN_LIMIT = 500;
+          const FUZZY_MAX_MERGES = 20;
+          const FUZZY_THRESHOLD = 0.95;
+          const MINHASH_PREFILTER = 0.7;
+          const recent = db.prepare(`
+            SELECT id, title, importance, created_at_epoch
+            FROM observations
+            WHERE COALESCE(compressed_into, 0) = 0
+              AND superseded_at IS NULL
+              AND created_at_epoch > ?
+              AND title IS NOT NULL AND title != ''
+            ORDER BY created_at_epoch DESC LIMIT ${SCAN_LIMIT}
+          `).all(STALE_AGE);
+          if (recent.length >= 2) {
+            const titles = recent.map(r => r.title.trim());
+            const minhashes = titles.map(t => t ? computeMinHash(t) : null);
+            const fuzzyRemoveIds = [];
+            const removed = new Set();
+            outer: for (let i = 0; i < recent.length; i++) {
+              if (!minhashes[i] || removed.has(recent[i].id)) continue;
+              for (let j = i + 1; j < recent.length; j++) {
+                if (!minhashes[j] || removed.has(recent[j].id)) continue;
+                if (estimateJaccardFromMinHash(minhashes[i], minhashes[j]) < MINHASH_PREFILTER) continue;
+                if (jaccardSimilarity(titles[i], titles[j]) < FUZZY_THRESHOLD) continue;
+                // Keep the higher-importance row; tiebreak by older (lower id wins access history)
+                const keep = (recent[i].importance ?? 1) >= (recent[j].importance ?? 1) ? recent[i] : recent[j];
+                const remove = keep === recent[i] ? recent[j] : recent[i];
+                fuzzyRemoveIds.push(remove.id);
+                removed.add(remove.id);
+                if (fuzzyRemoveIds.length >= FUZZY_MAX_MERGES) break outer;
+              }
+            }
+            if (fuzzyRemoveIds.length > 0) {
+              const ph = fuzzyRemoveIds.map(() => '?').join(',');
+              db.prepare(`UPDATE observations SET superseded_at = ?, superseded_by = 'auto-dedup-fuzzy' WHERE id IN (${ph})`)
+                .run(Date.now(), ...fuzzyRemoveIds);
+              debugLog('DEBUG', 'auto-maintain', `fuzzy auto-deduped ${fuzzyRemoveIds.length} near-identical observations`);
+            }
+          }
+        }
         // Mark maintenance as done (24h gate) — even though compression runs in background
         writeFileSync(maintainFile, JSON.stringify({ epoch: Date.now() }));
         // Weekly summary grouping runs in background to avoid blocking SessionStart

package/lib/stats-quality.mjs CHANGED Viewed

@@ -5,6 +5,7 @@
 import { notLowSignalTitleClause } from '../scoring-sql.mjs';
 import { truncate } from '../format-utils.mjs';
+import { COMPRESSED_PENDING_PURGE } from '../utils.mjs';
 export function computeQualityStats(db, { project, days }) {
   const projectFilter = project ? 'AND project = ?' : '';
@@ -69,11 +70,22 @@ export function computeQualityStats(db, { project, days }) {
     LIMIT 5
   `).all(...baseParams);
-  return { windowRow, allTimeRow, typeRows, topLessons, project, days };
+  // Pending-purge backlog: compressed records waiting on the time-based purge gate.
+  // High ratio signals push/pull imbalance — auto-mark fires daily but purge needs
+  // age > 37d, so a sudden write surge inflates this until the cohort ages out.
+  const purgeRow = db.prepare(`
+    SELECT
+      SUM(CASE WHEN compressed_into IS NOT NULL AND compressed_into != 0 THEN 1 ELSE 0 END) as compressed,
+      SUM(CASE WHEN compressed_into = ${COMPRESSED_PENDING_PURGE} THEN 1 ELSE 0 END) as pending_purge
+    FROM observations
+    WHERE 1=1 ${projectFilter}
+  `).get(...baseParams);
+  return { windowRow, allTimeRow, typeRows, topLessons, purgeRow, project, days };
 }
 export function formatQualityReport(data) {
-  const { windowRow, allTimeRow, typeRows, topLessons, project, days } = data;
+  const { windowRow, allTimeRow, typeRows, topLessons, purgeRow, project, days } = data;
   const pct = (n, d) => d > 0 ? (100 * n / d).toFixed(1) : '0.0';
   const scope = project ? ` — ${project}` : '';
   const lines = [];
@@ -126,5 +138,16 @@ export function formatQualityReport(data) {
   lines.push(`    ${lessonStatus} Lesson rate ≥ 15%    → currently ${lessonPct}%  (gap ${lessonGap >= 0 ? '+' : ''}${lessonGap}pp)`);
   lines.push(`    ${noiseStatus} LOW_SIGNAL  ≤ 30%    → currently ${noisePct}%  (gap ${noiseGap >= 0 ? '+' : ''}${noiseGap}pp)`);
+  // Pending-purge ratio: fraction of compressed records still waiting deletion.
+  // Compressed-but-not-yet-purged is normal (37d retention floor); a high ratio
+  // either means a recent write surge OR that auto-maintain isn't running.
+  if (purgeRow && (purgeRow.compressed ?? 0) > 0) {
+    const purgePct = pct(purgeRow.pending_purge, purgeRow.compressed);
+    const purgeNum = parseFloat(purgePct);
+    const purgeGap = (purgeNum - 10).toFixed(1);
+    const purgeStatus = purgeNum <= 10 ? '✅' : (purgeNum <= 30 ? '🟡' : '🔴');
+    lines.push(`    ${purgeStatus} Pending purge ≤ 10%  → currently ${purgePct}% (${purgeRow.pending_purge}/${purgeRow.compressed})  (gap ${purgeGap >= 0 ? '+' : ''}${purgeGap}pp)${purgeNum > 10 ? ' — run: claude-mem-lite maintain execute --ops purge_stale --confirm' : ''}`);
+  }
   return lines.join('\n');
 }

package/mem-cli.mjs CHANGED Viewed

@@ -4,13 +4,14 @@
 import { homedir } from 'os';
 import { ensureDb, DB_PATH, REGISTRY_DB_PATH } from './schema.mjs';
-import { sanitizeFtsQuery, relaxFtsQueryToOr, truncate, typeIcon, inferProject, jaccardSimilarity, computeMinHash, estimateJaccardFromMinHash, scrubSecrets, cjkBigrams, isoWeekKey, COMPRESSED_PENDING_PURGE, OBS_BM25, SESS_BM25, TYPE_DECAY_CASE, TYPE_QUALITY_CASE, DEFAULT_DECAY_HALF_LIFE_MS, getCurrentBranch, notLowSignalTitleClause, LOW_SIGNAL_TITLE } from './utils.mjs';
+import { sanitizeFtsQuery, relaxFtsQueryToOr, truncate, typeIcon, inferProject, jaccardSimilarity, computeMinHash, estimateJaccardFromMinHash, scrubSecrets, cjkBigrams, isoWeekKey, COMPRESSED_PENDING_PURGE, OBS_BM25, SESS_BM25, DEFAULT_DECAY_HALF_LIFE_MS, getCurrentBranch, notLowSignalTitleClause } from './utils.mjs';
 import { cjkPrecisionOk } from './nlp.mjs';
 import { extractCjkLikePatterns } from './nlp.mjs';
 import { resolveProject } from './project-utils.mjs';
 import { computeTier, TIER_CASE_SQL, tierSqlParams } from './tier.mjs';
-import { getVocabulary, computeVector, vectorSearch, rrfMerge, VECTOR_SCAN_LIMIT, rebuildVocabulary, _resetVocabCache } from './tfidf.mjs';
-import { autoBoostIfNeeded, reRankWithContext, markSuperseded, extractPRFTerms, expandQueryByConcepts } from './server-internals.mjs';
+import { getVocabulary, computeVector, rebuildVocabulary, _resetVocabCache } from './tfidf.mjs';
+import { autoBoostIfNeeded, reRankWithContext, markSuperseded } from './server-internals.mjs';
+import { searchObservationsHybrid } from './search-engine.mjs';
 import { ensureRegistryDb, upsertResource } from './registry.mjs';
 import { searchResources } from './registry-retriever.mjs';
 import { optimizePreview, optimizeRun } from './hook-optimize.mjs';
@@ -72,6 +73,7 @@ function cmdSearch(db, args) {
   // error logs, etc.) which are otherwise filtered from default search. Use for auditing or
   // when explicitly searching for a file/command that produced a degraded title.
   const includeNoise = flags['include-noise'] === true || flags['include-noise'] === 'true';
+  const jsonOutput = flags.json === true || flags.json === 'true';
   if (source && !['observations', 'sessions', 'prompts'].includes(source)) {
     fail(`[mem] Invalid --source "${source}". Use: observations, sessions, prompts`);
@@ -94,87 +96,42 @@ function cmdSearch(db, args) {
   // When --type/--tier/--importance (obs-only fields) is specified, implicitly restrict to observations
   const effectiveSource = source || ((type || tier || minImportance) ? 'observations' : null);
+  // Cross-source mode: each source needs more candidates than the final limit
+  // so the post-merge sort has room to pick the best from each (paired-path with
+  // server.mjs:377 — without this, obs gets systematically squeezed out by sessions).
+  const isCrossSourceMode = !effectiveSource;
+  const perSourceLimit = isCrossSourceMode ? Math.max(limit * 3, offset + limit + 10) : limit;
+  const perSourceOffset = isCrossSourceMode ? 0 : offset;
   const results = [];
   // Tracks whether AND returned 0 and OR recovered non-empty. Mirrors server.mjs
   // ctx.orFallbackFired so the header can surface a "(relaxed AND→OR)" hint.
   let orFallbackFired = false;
-  // Search observations
+  // Search observations — shared engine with server.mjs (#8198/#8212 paired-path fix)
   if (!effectiveSource || effectiveSource === 'observations') {
-    let obsRows = searchFts(db, ftsQuery, { type, project, limit, dateFrom, dateTo, minImportance, branch, includeNoise, offset: effectiveSource ? offset : 0 });
-    if (obsRows.length === 0) {
-      const orQuery = relaxFtsQueryToOr(ftsQuery);
-      if (orQuery) {
-        try {
-          obsRows = searchFts(db, orQuery, { type, project, limit, dateFrom, dateTo, minImportance, branch, includeNoise, offset: effectiveSource ? offset : 0 });
-          if (obsRows.length > 0) orFallbackFired = true;
-        } catch {}
-      }
-    }
-    // Type-list fallback
-    if (obsRows.length === 0 && type) {
-      const typeWheres = ['COALESCE(compressed_into, 0) = 0', 'superseded_at IS NULL', 'type = ?'];
-      const typeParams = [type];
-      if (project) { typeWheres.push('project = ?'); typeParams.push(project); }
-      if (dateFrom) { typeWheres.push('created_at_epoch >= ?'); typeParams.push(dateFrom); }
-      if (dateTo) { typeWheres.push('created_at_epoch <= ?'); typeParams.push(dateTo); }
-      if (minImportance) { typeWheres.push('COALESCE(importance, 1) >= ?'); typeParams.push(minImportance); }
-      if (branch) { typeWheres.push('branch = ?'); typeParams.push(branch); }
-      typeParams.push(limit);
-      obsRows = db.prepare(`
-        SELECT id, type, title, subtitle, created_at, lesson_learned
-        FROM observations
-        WHERE ${typeWheres.join(' AND ')}
-        ORDER BY created_at_epoch DESC
-        LIMIT ?
-      `).all(...typeParams);
-    }
-    for (const r of obsRows) results.push({ ...r, _source: 'obs', score: r.score ?? 0 });
-    // Concept co-occurrence + PRF expansion (aligned with MCP searchObservations)
-    if (obsRows.length > 0 && results.filter(r => r._source === 'obs').length < Math.ceil(limit / 2)) {
-      const existingIds = new Set(results.filter(r => r._source === 'obs').map(r => r.id));
-      // Concept co-occurrence expansion
-      const expanded = expandQueryByConcepts(db, ftsQuery, project || null);
-      if (expanded.length > 0) {
-        const expansionFts = expanded.map(c => `"${c.replace(/"/g, '""')}"`).join(' OR ');
-        try {
-          const expRows = searchFts(db, expansionFts, { type, project, limit, dateFrom, dateTo, minImportance, branch, includeNoise, offset: 0 });
-          for (const r of expRows) {
-            if (!existingIds.has(r.id)) {
-              existingIds.add(r.id);
-              results.push({ ...r, _source: 'obs', score: (r.score ?? 0) * 0.7 });
-            }
-          }
-        } catch { /* expansion is best-effort */ }
-      }
-      // PRF expansion (only if ≥3 primary results)
-      if (obsRows.length >= 3) {
-        const topResults = db.prepare(`
-          SELECT o.title, o.narrative FROM observations_fts
-          JOIN observations o ON observations_fts.rowid = o.id
-          WHERE observations_fts MATCH ? AND COALESCE(o.compressed_into, 0) = 0
-            AND (? IS NULL OR o.project = ?)
-          ORDER BY ${OBS_BM25}
-          LIMIT 8
-        `).all(ftsQuery, project ?? null, project ?? null);
-        const prfTerms = extractPRFTerms(topResults, ftsQuery);
-        if (prfTerms.length > 0) {
-          const prfFts = prfTerms.map(t => `"${t.replace(/"/g, '""')}"`).join(' OR ');
-          try {
-            const prfRows = searchFts(db, prfFts, { type, project, limit, dateFrom, dateTo, minImportance, branch, includeNoise, offset: 0 });
-            for (const r of prfRows) {
-              if (!existingIds.has(r.id)) {
-                existingIds.add(r.id);
-                results.push({ ...r, _source: 'obs', score: (r.score ?? 0) * 0.6 });
-              }
-            }
-          } catch { /* PRF is best-effort */ }
-        }
-      }
-    }
-    // Tier post-filter — applied to ALL obs results (initial + expansion + PRF)
+    const obsCtx = {
+      ftsQuery,
+      args: {
+        project: project || null,
+        obs_type: type || null,
+        importance: minImportance || null,
+        branch: branch || null,
+        include_noise: includeNoise,
+      },
+      epochFrom: dateFrom,
+      epochTo: dateTo,
+      perSourceLimit,
+      perSourceOffset,
+      currentProject: project ? null : inferProject(),
+      limit,
+      orFallbackFired: false,
+    };
+    const obsResults = searchObservationsHybrid(db, obsCtx);
+    if (obsCtx.orFallbackFired) orFallbackFired = true;
+    for (const r of obsResults) results.push({ ...r, _source: 'obs', score: r.score ?? 0 });
+    // Tier post-filter — applied to ALL obs results from the engine.
     if (tier) {
       const obsInResults = results.filter(r => r._source === 'obs');
       if (obsInResults.length > 0) {
@@ -205,7 +162,7 @@ function cmdSearch(db, args) {
     if (project) { sessWheres.push('s.project = ?'); sessParams.push(project); }
     if (dateFrom) { sessWheres.push('s.created_at_epoch >= ?'); sessParams.push(dateFrom); }
     if (dateTo) { sessWheres.push('s.created_at_epoch <= ?'); sessParams.push(dateTo); }
-    sessParams.push(effectiveSource ? limit : limit, effectiveSource ? offset : 0);
+    sessParams.push(perSourceLimit, perSourceOffset);
     try {
       const sessRows = db.prepare(`
         SELECT s.id, s.request, s.completed, s.project, s.created_at, s.created_at_epoch,
@@ -229,7 +186,7 @@ function cmdSearch(db, args) {
     if (project) { promptWheres.push('s.project = ?'); promptParams.push(project); }
     if (dateFrom) { promptWheres.push('p.created_at_epoch >= ?'); promptParams.push(dateFrom); }
     if (dateTo) { promptWheres.push('p.created_at_epoch <= ?'); promptParams.push(dateTo); }
-    promptParams.push(effectiveSource ? limit : limit, effectiveSource ? offset : 0);
+    promptParams.push(perSourceLimit, perSourceOffset);
     try {
       const promptRows = db.prepare(`
         SELECT p.id, p.prompt_text, p.content_session_id, p.created_at, p.created_at_epoch,
@@ -256,7 +213,7 @@ function cmdSearch(db, args) {
           if (project) likeParams.push(project);
           if (dateFrom) likeParams.push(dateFrom);
           if (dateTo) likeParams.push(dateTo);
-          likeParams.push(effectiveSource ? limit : limit, effectiveSource ? offset : 0);
+          likeParams.push(perSourceLimit, perSourceOffset);
           const fallbackRows = db.prepare(`
             SELECT p.id, p.prompt_text, p.content_session_id, p.created_at, p.created_at_epoch
             FROM user_prompts p
@@ -281,13 +238,18 @@ function cmdSearch(db, args) {
   }
   if (results.length === 0) {
-    out(`[mem] No results for "${query}"`);
+    if (jsonOutput) {
+      out(JSON.stringify({ query, total: 0, returned: 0, offset, limit, results: [] }));
+    } else {
+      out(`[mem] No results for "${query}"`);
+    }
     return;
   }
-  // Cross-source score normalization (aligned with MCP mem_search)
-  const isCrossSource = !effectiveSource;
-  if (isCrossSource && results.length > 0) {
+  // Cross-source score normalization (paired-path with server.mjs:428).
+  // ftsQuery gate prevents normalization when scores are all 0 (no-FTS path).
+  const isCrossSource = isCrossSourceMode;
+  if (isCrossSource && results.length > 0 && ftsQuery) {
     for (const src of ['obs', 'session', 'prompt']) {
       const srcResults = results.filter(r => r._source === src && r.score !== null && r.score !== undefined);
       if (srcResults.length < 2) continue;
@@ -318,18 +280,63 @@ function cmdSearch(db, args) {
   // else 'relevance' keeps BM25 score order (already sorted)
   // Trim to limit with offset
+  const total = results.length;
   const paged = results.slice(offset, offset + limit);
   if (paged.length === 0) {
-    out(`[mem] No results for "${query}" at offset ${offset}`);
+    if (jsonOutput) {
+      out(JSON.stringify({ query, total, returned: 0, offset, limit, results: [] }));
+    } else {
+      out(`[mem] No results for "${query}" at offset ${offset}`);
+    }
     return;
   }
+  // paired-path with server.mjs formatSearchOutput (#8198): "N of M" total when paged < total.
   const showTime = sort === 'time';
   const hasMixed = paged.some(r => r._source === 'session' || r._source === 'prompt');
   // Suppressed when --or was explicit — user already asked for OR, no "fallback" there.
   const fallbackHint = orFallbackFired && !useOr ? ' (relaxed AND→OR)' : '';
-  out(`[mem] ${paged.length} result${paged.length !== 1 ? 's' : ''} for "${query}"${fallbackHint}:${hasMixed ? ' (# observation, S# session, P# prompt)' : ''}`);
+  if (jsonOutput) {
+    const items = paged.map(r => {
+      const base = {
+        source: r._source,
+        id: r.id,
+        created_at: r.created_at,
+        score: r.score ?? null,
+      };
+      if (r._source === 'session') {
+        return { ...base, request: r.request || null, completed: r.completed || null, project: r.project || null };
+      }
+      if (r._source === 'prompt') {
+        return { ...base, prompt_text: r.prompt_text || null };
+      }
+      return {
+        ...base,
+        type: r.type,
+        title: r.title || r.subtitle || null,
+        lesson_learned: r.lesson_learned || null,
+        importance: r.importance ?? null,
+        superseded: Boolean(r.superseded),
+        files_modified: r.files_modified || null,
+      };
+    });
+    out(JSON.stringify({
+      query,
+      total,
+      returned: paged.length,
+      offset,
+      limit,
+      relaxed_and_to_or: orFallbackFired && !useOr,
+      mixed_sources: hasMixed,
+      results: items,
+    }));
+    return;
+  }
+  const countLabel = total > paged.length ? `${paged.length} of ${total}` : `${paged.length}`;
+  out(`[mem] Found ${countLabel} result${paged.length !== 1 ? 's' : ''} for "${query}"${fallbackHint}:${hasMixed ? ' (# observation, S# session, P# prompt)' : ''}`);
   for (const r of paged) {
     const timeStr = showTime && r.created_at_epoch ? ` (${relativeTime(r.created_at_epoch)})` : '';
     if (r._source === 'session') {
@@ -350,115 +357,6 @@ function cmdSearch(db, args) {
   }
 }
-function searchFts(db, ftsQuery, { type, project, limit, dateFrom, dateTo, minImportance, branch, includeNoise, offset }) {
-  const now = Date.now();
-  // Current project for boost (2× when no explicit project filter)
-  const currentProject = !project ? inferProject() : null;
-  // WHERE clause params (positional ? in SQL order)
-  const whereParams = [ftsQuery];
-  const wheres = [
-    'observations_fts MATCH ?',
-    'COALESCE(o.compressed_into, 0) = 0',
-    'o.superseded_at IS NULL',
-  ];
-  if (project) { wheres.push('o.project = ?'); whereParams.push(project); }
-  if (type) { wheres.push('o.type = ?'); whereParams.push(type); }
-  if (dateFrom) { wheres.push('o.created_at_epoch >= ?'); whereParams.push(dateFrom); }
-  if (dateTo) { wheres.push('o.created_at_epoch <= ?'); whereParams.push(dateTo); }
-  if (minImportance) { wheres.push('COALESCE(o.importance, 1) >= ?'); whereParams.push(minImportance); }
-  if (branch) { wheres.push('o.branch = ?'); whereParams.push(branch); }
-  // R-1: exclude hook-llm fallback titles ("Modified X", "Worked on X", raw error logs)
-  // from default search. They compete for BM25 rank but have ~3% access rate. Mirrors the
-  // filter already applied in hook-memory.mjs, hook-context.mjs, and user-prompt-search.js.
-  // Use --include-noise to audit them.
-  if (!includeNoise) wheres.push(notLowSignalTitleClause('o'));
-  // Param order: SELECT scoring (now, proj, proj) → WHERE (ftsQuery, filters...) → ORDER BY scoring (now, proj, proj) → LIMIT/OFFSET
-  const scoreParams = [now, currentProject, currentProject];
-  const params = [...scoreParams, ...whereParams, ...scoreParams, limit, offset || 0];
-  // Scoring aligned with server.mjs: BM25 × type-decay × type-quality × project_boost × importance × access_bonus × lesson-boost
-  // R-3: lesson_learned presence adds a +0.3 multiplier (empirical: +6.3pp hit-rate lift on bugfix).
-  const ftsRows = db.prepare(`
-    SELECT o.id, o.type, o.title, o.subtitle, o.created_at, o.created_at_epoch, o.lesson_learned,
-           o.files_modified, o.importance,
-           ${OBS_BM25}
-             * (1.0 + EXP(-0.693 * (? - MAX(o.created_at_epoch, COALESCE(o.last_accessed_at, o.created_at_epoch))) / ${TYPE_DECAY_CASE}))
-             * ${TYPE_QUALITY_CASE}
-             * (CASE WHEN ? IS NOT NULL AND o.project = ? THEN 2.0 ELSE 1.0 END)
-             * (0.5 + 0.5 * COALESCE(o.importance, 1))
-             * (1.0 + 0.1 * LN(1 + COALESCE(o.access_count, 0)))
-             * (1.0 + 0.3 * (o.lesson_learned IS NOT NULL)) as score
-    FROM observations_fts
-    JOIN observations o ON observations_fts.rowid = o.id
-    WHERE ${wheres.join(' AND ')}
-    ORDER BY ${OBS_BM25}
-      * (1.0 + EXP(-0.693 * (? - MAX(o.created_at_epoch, COALESCE(o.last_accessed_at, o.created_at_epoch))) / ${TYPE_DECAY_CASE}))
-      * ${TYPE_QUALITY_CASE}
-      * (CASE WHEN ? IS NOT NULL AND o.project = ? THEN 2.0 ELSE 1.0 END)
-      * (0.5 + 0.5 * COALESCE(o.importance, 1))
-      * (1.0 + 0.1 * LN(1 + COALESCE(o.access_count, 0)))
-      * (1.0 + 0.3 * (o.lesson_learned IS NOT NULL))
-    LIMIT ? OFFSET ?
-  `).all(...params);
-  // Hybrid: vector search + RRF merge (best-effort)
-  try {
-    const vocab = getVocabulary(db);
-    if (vocab) {
-      const queryText = ftsQuery.replace(/['"()]/g, ' ');
-      const queryVec = computeVector(queryText, vocab);
-      if (queryVec) {
-        const vecResults = vectorSearch(db, queryVec, {
-          project: project || null,
-          vocabVersion: vocab.version,
-          limit: VECTOR_SCAN_LIMIT,
-        });
-        if (vecResults.length > 0 && ftsRows.length > 0) {
-          const rrfRanking = rrfMerge(ftsRows, vecResults);
-          const rowMap = new Map(ftsRows.map(r => [r.id, r]));
-          for (const vr of vecResults) {
-            if (!rowMap.has(vr.id)) {
-              const obs = db.prepare('SELECT id, type, title, subtitle, created_at, created_at_epoch, lesson_learned, importance, branch, files_modified FROM observations WHERE id = ?').get(vr.id);
-              if (obs) {
-                // Apply same filters as FTS5 query (aligned with MCP searchObservations)
-                if (dateFrom && obs.created_at_epoch < dateFrom) continue;
-                if (dateTo && obs.created_at_epoch > dateTo) continue;
-                if (minImportance && (obs.importance ?? 1) < minImportance) continue;
-                if (branch && obs.branch !== branch) continue;
-                // R-1: LOW_SIGNAL filter also applies to vector-side additions (the SQL
-                // clause only filtered the FTS5 side) so RRF can't re-admit noise.
-                if (!includeNoise && obs.title && LOW_SIGNAL_TITLE.test(obs.title)) continue;
-                rowMap.set(vr.id, obs);
-              }
-            }
-          }
-          return rrfRanking
-            .filter(rr => rowMap.has(rr.id))
-            .map(rr => rowMap.get(rr.id))
-            .slice(0, limit);
-        } else if (vecResults.length > 0 && ftsRows.length === 0) {
-          return vecResults
-            .map(vr => db.prepare('SELECT id, type, title, subtitle, created_at, created_at_epoch, lesson_learned, importance, branch FROM observations WHERE id = ?').get(vr.id))
-            .filter(obs => {
-              if (!obs) return false;
-              if (dateFrom && obs.created_at_epoch < dateFrom) return false;
-              if (dateTo && obs.created_at_epoch > dateTo) return false;
-              if (minImportance && (obs.importance ?? 1) < minImportance) return false;
-              if (branch && obs.branch !== branch) return false;
-              if (!includeNoise && obs.title && LOW_SIGNAL_TITLE.test(obs.title)) return false;
-              return true;
-            })
-            .slice(0, limit);
-        }
-      }
-    }
-  } catch { /* vector search is best-effort */ }
-  return ftsRows;
-}
 function cmdRecent(db, args) {
   const { positional, flags } = parseArgs(args);
   const rawArg = positional[0];
@@ -1985,6 +1883,7 @@ Commands:
     --sort S            Sort: relevance (default), time, importance
     --or                Use OR instead of AND between search terms
     --include-noise     Include hook-llm fallback titles ("Modified X", raw error logs)
+    --json              Output as JSON: {query,total,returned,offset,limit,results:[…]}
   recent [N]            Show N most recent observations (default 10)
     --project P         Filter by project
@@ -2002,7 +1901,11 @@ Commands:
   timeline              Show observations around an anchor (shows recent if no anchor)
     --anchor ID         Center on this ID. Accepts N, #N, P#N, or S#N — P#/S# anchors
                         resolve to the nearest-in-time observation in the same project.
-    --query "text"      Find anchor by FTS5 search
+    --query "text"      Find anchor by FTS5 search. Ranks by BM25 × time-decay,
+                        so multi-term queries surface the BEST topical match
+                        (highest term coverage), not the most recent. For
+                        "recent activity around X", use 'recent' or
+                        'search "X" --sort time' instead.
     --before N          Show N before anchor (default 5)
     --after N           Show N after anchor (default 5)
     --project P         Filter by project

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "claude-mem-lite",
-  "version": "2.51.0",
+  "version": "2.53.0",
   "description": "Lightweight persistent memory system for Claude Code",
   "type": "module",
   "engines": {
@@ -26,6 +26,7 @@
     "mem-cli.mjs",
     "server.mjs",
     "server-internals.mjs",
+    "search-engine.mjs",
     "hook.mjs",
     "hook-shared.mjs",
     "hook-llm.mjs",

package/search-engine.mjs ADDED Viewed

@@ -0,0 +1,249 @@
+// Shared observation-search engine — the single source of truth for
+// hybrid FTS5 + vector ranking, OR fallback, concept/PRF expansion, and
+// RRF merge. Both server.mjs (mem_search MCP tool) and mem-cli.mjs (search CLI)
+// import these helpers so identical queries return identical candidate sets
+// and rankings. See #8198 / #8212 for the prior paired-path divergence this
+// module exists to eliminate.
+import {
+  OBS_BM25, TYPE_DECAY_CASE, TYPE_QUALITY_CASE,
+  notLowSignalTitleClause, LOW_SIGNAL_TITLE,
+  relaxFtsQueryToOr, debugLog, debugCatch,
+} from './utils.mjs';
+import { getVocabulary, computeVector, vectorSearch, rrfMerge } from './tfidf.mjs';
+import { extractPRFTerms, expandQueryByConcepts } from './server-internals.mjs';
+// Scoring expressions — full adds project boost + access bonus; simple is for
+// expansion paths where boost would over-amplify already-loose matches.
+const FULL_SCORE = `${OBS_BM25}
+  * (1.0 + EXP(-0.693 * (? - MAX(o.created_at_epoch, COALESCE(o.last_accessed_at, o.created_at_epoch))) / ${TYPE_DECAY_CASE}))
+  * ${TYPE_QUALITY_CASE}
+  * (CASE WHEN ? IS NOT NULL AND o.project = ? THEN 2.0 ELSE 1.0 END)
+  * (0.5 + 0.5 * COALESCE(o.importance, 1))
+  * (1.0 + 0.1 * LN(1 + COALESCE(o.access_count, 0)))
+  * (1.0 + 0.3 * (o.lesson_learned IS NOT NULL))`;
+const SIMPLE_SCORE = `${OBS_BM25}
+  * (1.0 + EXP(-0.693 * (? - MAX(o.created_at_epoch, COALESCE(o.last_accessed_at, o.created_at_epoch))) / ${TYPE_DECAY_CASE}))
+  * ${TYPE_QUALITY_CASE}
+  * (0.5 + 0.5 * COALESCE(o.importance, 1))
+  * (1.0 + 0.3 * (o.lesson_learned IS NOT NULL))`;
+export function buildObsFtsQuery(scoring, { multiplier, withSnippet, withOffset, includeNoise } = {}) {
+  const scoreExpr = scoring === 'full' ? FULL_SCORE : SIMPLE_SCORE;
+  const mult = multiplier ? ` * ${multiplier}` : '';
+  const lowSignalClause = includeNoise ? '' : `AND ${notLowSignalTitleClause('o')}`;
+  return `
+    SELECT o.id, o.type, o.title, o.subtitle, o.project, o.created_at, o.created_at_epoch, o.importance,
+           o.files_modified, o.lesson_learned,
+           ${withSnippet ? "snippet(observations_fts, 2, '»', '«', '…', 10) as match_snippet," : ''}
+           ${scoreExpr}${mult} as score
+    FROM observations_fts
+    JOIN observations o ON observations_fts.rowid = o.id
+    WHERE observations_fts MATCH ?
+      AND COALESCE(o.compressed_into, 0) = 0
+      AND o.superseded_at IS NULL
+      AND (? IS NULL OR o.project = ?)
+      AND (? IS NULL OR o.type = ?)
+      AND (? IS NULL OR o.created_at_epoch >= ?)
+      AND (? IS NULL OR o.created_at_epoch <= ?)
+      AND (? IS NULL OR COALESCE(o.importance, 1) >= ?)
+      AND (? IS NULL OR o.branch = ?)
+      ${lowSignalClause}
+    ORDER BY score
+    LIMIT ?${withOffset ? ' OFFSET ?' : ''}`;
+}
+export function buildObsFtsParams({ now, projectBoost, ftsQuery, args, epochFrom, epochTo, limit, offset }) {
+  const params = [now];
+  if (projectBoost !== undefined) params.push(projectBoost, projectBoost);
+  params.push(
+    ftsQuery,
+    args.project ?? null, args.project ?? null,
+    args.obs_type ?? null, args.obs_type ?? null,
+    epochFrom, epochFrom,
+    epochTo, epochTo,
+    args.importance ?? null, args.importance ?? null,
+    args.branch ?? null, args.branch ?? null,
+    limit,
+  );
+  if (offset !== undefined) params.push(offset);
+  return params;
+}
+export function ftsRowToResult(r, { scoreMultiplier, snippet } = {}) {
+  return {
+    source: 'obs', id: r.id, type: r.type, title: r.title, subtitle: r.subtitle,
+    project: r.project, date: r.created_at, created_at_epoch: r.created_at_epoch,
+    score: scoreMultiplier ? r.score * scoreMultiplier : r.score,
+    files_modified: r.files_modified, importance: r.importance, lesson_learned: r.lesson_learned,
+    snippet: snippet ? (r.match_snippet || '') : '',
+  };
+}
+function expandObsByConceptCo(db, ctx, now, existingIds, results, includeNoise = false) {
+  const { ftsQuery, args, epochFrom, epochTo, limit } = ctx;
+  if (results.length >= Math.ceil(limit / 2)) return;
+  const expanded = expandQueryByConcepts(db, ftsQuery, args.project);
+  if (expanded.length === 0) return;
+  const expansionFts = expanded.map(c => `"${c.replace(/"/g, '""')}"`).join(' OR ');
+  try {
+    const expRows = db.prepare(buildObsFtsQuery('simple', { includeNoise }))
+      .all(...buildObsFtsParams({ now, ftsQuery: expansionFts, args, epochFrom, epochTo, limit }));
+    for (const r of expRows) {
+      if (!existingIds.has(r.id)) {
+        existingIds.add(r.id);
+        results.push(ftsRowToResult(r, { scoreMultiplier: 0.7 }));
+      }
+    }
+  } catch (e) { debugLog('WARN', 'search-engine', `concept expansion error: ${e.message}`); }
+}
+function expandObsByPRF(db, ctx, now, primaryCount, existingIds, results, includeNoise = false) {
+  const { ftsQuery, args, epochFrom, epochTo, limit } = ctx;
+  if (primaryCount < 3) return;
+  const topResults = db.prepare(`
+    SELECT o.title, o.narrative FROM observations_fts
+    JOIN observations o ON observations_fts.rowid = o.id
+    WHERE observations_fts MATCH ? AND COALESCE(o.compressed_into, 0) = 0
+      AND (? IS NULL OR o.project = ?)
+    ORDER BY ${OBS_BM25}
+    LIMIT 8
+  `).all(ftsQuery, args.project ?? null, args.project ?? null);
+  const prfTerms = extractPRFTerms(topResults, ftsQuery);
+  if (prfTerms.length === 0) return;
+  const prfFts = prfTerms.map(t => `"${t.replace(/"/g, '""')}"`).join(' OR ');
+  try {
+    const prfRows = db.prepare(buildObsFtsQuery('simple', { includeNoise }))
+      .all(...buildObsFtsParams({ now, ftsQuery: prfFts, args, epochFrom, epochTo, limit }));
+    for (const r of prfRows) {
+      if (!existingIds.has(r.id)) {
+        existingIds.add(r.id);
+        results.push(ftsRowToResult(r, { scoreMultiplier: 0.6 }));
+      }
+    }
+  } catch (e) { debugLog('WARN', 'search-engine', `PRF expansion error: ${e.message}`); }
+}
+/**
+ * Hybrid observation search — single source of truth for FTS + vector + RRF.
+ *
+ * Pipeline (paired-path with mem-cli.mjs cmdSearch via this module):
+ *   1. FTS5 BM25 query (full scoring)
+ *   2. OR fallback when AND returned 0 → sets ctx.orFallbackFired
+ *   3. Concept co-occurrence expansion (when results sparse)
+ *   4. PRF (pseudo-relevance feedback) expansion
+ *   5. Vector search + RRF merge (re-ranks all results when both modes have hits)
+ *   6. Vector-only fallback (when FTS5 found nothing)
+ *
+ * @param {Database} db - better-sqlite3 instance
+ * @param {object} ctx - { ftsQuery, args, epochFrom, epochTo, perSourceLimit,
+ *                         perSourceOffset, currentProject, limit, orFallbackFired }
+ * @returns {Array} list of result objects (mutated ctx may set orFallbackFired)
+ */
+export function searchObservationsHybrid(db, ctx) {
+  const { ftsQuery, args, epochFrom, epochTo, perSourceLimit, perSourceOffset, currentProject, limit } = ctx;
+  const results = [];
+  const includeNoise = args.include_noise === true;
+  if (!ftsQuery) {
+    const params = [];
+    const wheres = ['COALESCE(compressed_into, 0) = 0', 'superseded_at IS NULL'];
+    if (args.project) { wheres.push('project = ?'); params.push(args.project); }
+    if (args.obs_type) { wheres.push('type = ?'); params.push(args.obs_type); }
+    if (epochFrom !== null) { wheres.push('created_at_epoch >= ?'); params.push(epochFrom); }
+    if (epochTo !== null) { wheres.push('created_at_epoch <= ?'); params.push(epochTo); }
+    if (args.importance) { wheres.push('COALESCE(importance, 1) >= ?'); params.push(args.importance); }
+    if (args.branch) { wheres.push('branch = ?'); params.push(args.branch); }
+    const where = `WHERE ${wheres.join(' AND ')}`;
+    params.push(perSourceLimit, perSourceOffset);
+    const rows = db.prepare(`
+      SELECT id, type, title, subtitle, project, created_at, created_at_epoch, files_modified, importance, lesson_learned
+      FROM observations ${where}
+      ORDER BY created_at_epoch DESC
+      LIMIT ? OFFSET ?
+    `).all(...params);
+    for (const r of rows) {
+      results.push({ source: 'obs', id: r.id, type: r.type, title: r.title, subtitle: r.subtitle, project: r.project, date: r.created_at, created_at_epoch: r.created_at_epoch, files_modified: r.files_modified, importance: r.importance, lesson_learned: r.lesson_learned });
+    }
+    return results;
+  }
+  const now = Date.now();
+  const projectBoost = args.project ? null : currentProject;
+  const rows = db.prepare(buildObsFtsQuery('full', { withSnippet: true, withOffset: true, includeNoise }))
+    .all(...buildObsFtsParams({ now, projectBoost, ftsQuery, args, epochFrom, epochTo, limit: perSourceLimit, offset: perSourceOffset }));
+  for (const r of rows) results.push(ftsRowToResult(r, { snippet: true }));
+  // OR fallback — must run BEFORE vector merge so orFallbackFired reflects FTS-only state.
+  if (rows.length === 0) {
+    const orQuery = relaxFtsQueryToOr(ftsQuery);
+    if (orQuery) {
+      try {
+        const orRows = db.prepare(buildObsFtsQuery('full', { multiplier: 0.5, withSnippet: true, withOffset: true, includeNoise }))
+          .all(...buildObsFtsParams({ now, projectBoost, ftsQuery: orQuery, args, epochFrom, epochTo, limit: perSourceLimit, offset: perSourceOffset }));
+        if (orRows.length > 0) ctx.orFallbackFired = true;
+        for (const r of orRows) results.push(ftsRowToResult(r, { snippet: true }));
+      } catch (e) { debugCatch(e, 'searchObservationsHybrid-or-fallback'); }
+    }
+  }
+  // Two-phase query expansion (only when well below limit)
+  if (rows.length > 0 && results.length < Math.ceil(limit / 2)) {
+    const existingIds = new Set(results.map(r => r.id));
+    expandObsByConceptCo(db, ctx, now, existingIds, results, includeNoise);
+    expandObsByPRF(db, ctx, now, rows.length, existingIds, results, includeNoise);
+  }
+  // Vector search + RRF hybrid merge
+  try {
+    const vocab = getVocabulary(db);
+    if (!vocab) return results;
+    const queryText = ftsQuery.replace(/['"()]/g, ' ');
+    const queryVec = computeVector(queryText, vocab);
+    if (!queryVec) return results;
+    const vecResults = vectorSearch(db, queryVec, {
+      project: args.project ?? null,
+      type: args.obs_type ?? null,
+      vocabVersion: vocab.version,
+    });
+    if (vecResults.length === 0) return results;
+    if (results.length > 0) {
+      const rrfRanking = rrfMerge(results, vecResults);
+      const resultMap = new Map(results.map(r => [r.id, r]));
+      for (const vr of vecResults) {
+        if (!resultMap.has(vr.id)) {
+          const obs = db.prepare('SELECT id, type, title, subtitle, project, created_at, created_at_epoch, importance, files_modified, branch, lesson_learned FROM observations WHERE id = ?').get(vr.id);
+          if (!obs) continue;
+          if (epochFrom !== null && obs.created_at_epoch < epochFrom) continue;
+          if (epochTo !== null && obs.created_at_epoch > epochTo) continue;
+          if (args.importance && (obs.importance ?? 1) < args.importance) continue;
+          if (args.branch && obs.branch !== args.branch) continue;
+          if (!includeNoise && obs.title && LOW_SIGNAL_TITLE.test(obs.title)) continue;
+          resultMap.set(vr.id, { source: 'obs', id: obs.id, type: obs.type, title: obs.title, subtitle: obs.subtitle, project: obs.project, date: obs.created_at, importance: obs.importance, files_modified: obs.files_modified, lesson_learned: obs.lesson_learned, snippet: '' });
+        }
+      }
+      const reordered = rrfRanking
+        .filter(rr => resultMap.has(rr.id))
+        .map(rr => ({ ...resultMap.get(rr.id), score: -rr.rrfScore }));
+      results.length = 0;
+      results.push(...reordered);
+    } else {
+      // FTS5 found nothing but vector found results
+      for (const vr of vecResults) {
+        const obs = db.prepare('SELECT id, type, title, subtitle, project, created_at, created_at_epoch, importance, files_modified, branch FROM observations WHERE id = ?').get(vr.id);
+        if (!obs) continue;
+        if (epochFrom !== null && obs.created_at_epoch < epochFrom) continue;
+        if (epochTo !== null && obs.created_at_epoch > epochTo) continue;
+        if (args.importance && (obs.importance ?? 1) < args.importance) continue;
+        if (args.branch && obs.branch !== args.branch) continue;
+        if (!includeNoise && obs.title && LOW_SIGNAL_TITLE.test(obs.title)) continue;
+        results.push({ source: 'obs', id: obs.id, type: obs.type, title: obs.title, subtitle: obs.subtitle, project: obs.project, date: obs.created_at, importance: obs.importance, files_modified: obs.files_modified, lesson_learned: obs.lesson_learned, score: -vr.similarity, snippet: '' });
+      }
+    }
+  } catch (e) { debugCatch(e, 'searchObservationsHybrid-vector'); }
+  return results;
+}

package/server.mjs CHANGED Viewed

@@ -5,11 +5,12 @@
 import { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js';
 import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js';
 import { ListToolsRequestSchema } from '@modelcontextprotocol/sdk/types.js';
-import { jaccardSimilarity, truncate, typeIcon, sanitizeFtsQuery, relaxFtsQueryToOr, inferProject, computeMinHash, estimateJaccardFromMinHash, scrubSecrets, cjkBigrams, fmtDate, isoWeekKey, debugLog, debugCatch, COMPRESSED_PENDING_PURGE, OBS_BM25, SESS_BM25, TYPE_DECAY_CASE, TYPE_QUALITY_CASE, getCurrentBranch, DEFAULT_DECAY_HALF_LIFE_MS, isPathConfined, notLowSignalTitleClause, LOW_SIGNAL_TITLE } from './utils.mjs';
+import { jaccardSimilarity, truncate, typeIcon, sanitizeFtsQuery, relaxFtsQueryToOr, inferProject, computeMinHash, estimateJaccardFromMinHash, scrubSecrets, cjkBigrams, fmtDate, isoWeekKey, debugLog, debugCatch, COMPRESSED_PENDING_PURGE, OBS_BM25, SESS_BM25, getCurrentBranch, DEFAULT_DECAY_HALF_LIFE_MS, isPathConfined, notLowSignalTitleClause } from './utils.mjs';
 import { extractCjkLikePatterns, cjkPrecisionOk } from './nlp.mjs';
 import { resolveProject as _resolveProjectShared } from './project-utils.mjs';
 import { ensureDb, DB_PATH, REGISTRY_DB_PATH } from './schema.mjs';
-import { reRankWithContext, markSuperseded, extractPRFTerms, expandQueryByConcepts, autoBoostIfNeeded, runIdleCleanup, buildServerInstructions } from './server-internals.mjs';
+import { reRankWithContext, markSuperseded, autoBoostIfNeeded, runIdleCleanup, buildServerInstructions } from './server-internals.mjs';
+import { searchObservationsHybrid } from './search-engine.mjs';
 import { effectiveQuiet } from './hook-shared.mjs';
 import { computeTier, TIER_CASE_SQL, tierSqlParams } from './tier.mjs';
 import { memSearchSchema, memRecentSchema, memTimelineSchema, memGetSchema, memDeleteSchema, memSaveSchema, memStatsSchema, memCompressSchema, memMaintainSchema, memOptimizeSchema, memUpdateSchema, memExportSchema, memRecallSchema, memFtsCheckSchema, memRegistrySchema, memBrowseSchema, memUseSchema, tools as TOOL_DEFS } from './tool-schemas.mjs';
@@ -28,7 +29,7 @@ import { homedir } from 'os';
 import { ensureRegistryDb, upsertResource } from './registry.mjs';
 import { searchResources } from './registry-retriever.mjs';
 import { probeOtherSources as probeIdSources, parseIdToken, bucketIdTokens } from './lib/id-routing.mjs';
-import { getVocabulary, rebuildVocabulary, _resetVocabCache, computeVector, vectorSearch, rrfMerge } from './tfidf.mjs';
+import { getVocabulary, rebuildVocabulary, _resetVocabCache, computeVector } from './tfidf.mjs';
 import { createRequire } from 'module';
 const require = createRequire(import.meta.url);
@@ -145,241 +146,13 @@ function safeHandler(fn) {
 // TYPE_DECAY_CASE imported from utils.mjs
 // Score expression variants for FTS5 queries (see Scoring Model Constants above)
-// TYPE_QUALITY_CASE demotes bugfix (×0.6) and promotes decision/discovery (×1.5/1.3)
-// R-3: lesson_learned presence adds ×1.3 boost — empirical +6.3pp hit-rate lift on bugfix.
-const FULL_SCORE = `${OBS_BM25}
-  * (1.0 + EXP(-0.693 * (? - MAX(o.created_at_epoch, COALESCE(o.last_accessed_at, o.created_at_epoch))) / ${TYPE_DECAY_CASE}))
-  * ${TYPE_QUALITY_CASE}
-  * (CASE WHEN ? IS NOT NULL AND o.project = ? THEN 2.0 ELSE 1.0 END)
-  * (0.5 + 0.5 * COALESCE(o.importance, 1))
-  * (1.0 + 0.1 * LN(1 + COALESCE(o.access_count, 0)))
-  * (1.0 + 0.3 * (o.lesson_learned IS NOT NULL))`;
-const SIMPLE_SCORE = `${OBS_BM25}
-  * (1.0 + EXP(-0.693 * (? - MAX(o.created_at_epoch, COALESCE(o.last_accessed_at, o.created_at_epoch))) / ${TYPE_DECAY_CASE}))
-  * ${TYPE_QUALITY_CASE}
-  * (0.5 + 0.5 * COALESCE(o.importance, 1))
-  * (1.0 + 0.3 * (o.lesson_learned IS NOT NULL))`;
-/**
- * Build an FTS5 observation search query.
- * @param {'full'|'simple'} scoring - full includes project boost + access bonus
- * @param {object} opts - { multiplier, withSnippet, withOffset, includeNoise }
- *   includeNoise=true keeps hook-llm fallback titles ("Modified X", "Worked on X", etc.);
- *   default false mirrors the filter already applied in hook-memory.mjs / user-prompt-search.js.
- */
-function buildObsFtsQuery(scoring, { multiplier, withSnippet, withOffset, includeNoise } = {}) {
-  const scoreExpr = scoring === 'full' ? FULL_SCORE : SIMPLE_SCORE;
-  const mult = multiplier ? ` * ${multiplier}` : '';
-  const lowSignalClause = includeNoise ? '' : `AND ${notLowSignalTitleClause('o')}`;
-  return `
-    SELECT o.id, o.type, o.title, o.subtitle, o.project, o.created_at, o.created_at_epoch, o.importance,
-           o.files_modified,
-           ${withSnippet ? "snippet(observations_fts, 2, '»', '«', '…', 10) as match_snippet," : ''}
-           ${scoreExpr}${mult} as score
-    FROM observations_fts
-    JOIN observations o ON observations_fts.rowid = o.id
-    WHERE observations_fts MATCH ?
-      AND COALESCE(o.compressed_into, 0) = 0
-      AND o.superseded_at IS NULL
-      AND (? IS NULL OR o.project = ?)
-      AND (? IS NULL OR o.type = ?)
-      AND (? IS NULL OR o.created_at_epoch >= ?)
-      AND (? IS NULL OR o.created_at_epoch <= ?)
-      AND (? IS NULL OR COALESCE(o.importance, 1) >= ?)
-      AND (? IS NULL OR o.branch = ?)
-      ${lowSignalClause}
-    ORDER BY score
-    LIMIT ?${withOffset ? ' OFFSET ?' : ''}`;
-}
-/** Build params array for an FTS5 observation query. */
-function buildObsFtsParams({ now, projectBoost, ftsQuery, args, epochFrom, epochTo, limit, offset }) {
-  const params = [now];
-  if (projectBoost !== undefined) params.push(projectBoost, projectBoost); // full scoring only
-  params.push(
-    ftsQuery,
-    args.project ?? null, args.project ?? null,
-    args.obs_type ?? null, args.obs_type ?? null,
-    epochFrom, epochFrom,
-    epochTo, epochTo,
-    args.importance ?? null, args.importance ?? null,
-    args.branch ?? null, args.branch ?? null,
-    limit,
-  );
-  if (offset !== undefined) params.push(offset);
-  return params;
-}
-/** Map a raw FTS5 row to a result object. */
-function ftsRowToResult(r, { scoreMultiplier, snippet } = {}) {
-  return {
-    source: 'obs', id: r.id, type: r.type, title: r.title, subtitle: r.subtitle,
-    project: r.project, date: r.created_at, created_at_epoch: r.created_at_epoch,
-    score: scoreMultiplier ? r.score * scoreMultiplier : r.score,
-    files_modified: r.files_modified, importance: r.importance, snippet: snippet ? (r.match_snippet || '') : '',
-  };
-}
+// Observation-search core (FTS query/params builders, hybrid pipeline) lives in
+// search-engine.mjs so mem-cli.mjs gets the identical implementation.
+// Thin wrapper around the shared engine — keeps the existing call sites
+// (searchObservations(ctx)) without ferrying `db` through every layer.
 function searchObservations(ctx) {
-  const { ftsQuery, args, epochFrom, epochTo, perSourceLimit, perSourceOffset, currentProject, limit } = ctx;
-  const results = [];
-  // R-1: hide hook-llm fallback titles unless caller explicitly opts in via include_noise=true.
-  const includeNoise = args.include_noise === true;
-  if (ftsQuery) {
-    const now = Date.now();
-    const projectBoost = args.project ? null : currentProject;
-    const rows = db.prepare(buildObsFtsQuery('full', { withSnippet: true, withOffset: true, includeNoise }))
-      .all(...buildObsFtsParams({ now, projectBoost, ftsQuery, args, epochFrom, epochTo, limit: perSourceLimit, offset: perSourceOffset }));
-    for (const r of rows) results.push(ftsRowToResult(r, { snippet: true }));
-    // OR fallback: when AND query returns 0 results, retry with OR semantics.
-    // Sets ctx.orFallbackFired so the top-level formatter can surface a "relaxed
-    // AND→OR" hint — without it, callers can't distinguish a strict multi-term
-    // match from a partial single-term recovery.
-    if (rows.length === 0) {
-      const orQuery = relaxFtsQueryToOr(ftsQuery);
-      if (orQuery) {
-        try {
-          const orRows = db.prepare(buildObsFtsQuery('full', { multiplier: 0.5, withSnippet: true, withOffset: true, includeNoise }))
-            .all(...buildObsFtsParams({ now, projectBoost, ftsQuery: orQuery, args, epochFrom, epochTo, limit: perSourceLimit, offset: perSourceOffset }));
-          if (orRows.length > 0) ctx.orFallbackFired = true;
-          for (const r of orRows) results.push(ftsRowToResult(r, { snippet: true }));
-        } catch (e) { debugCatch(e, 'searchObservations-or-fallback'); }
-      }
-    }
-    // Two-phase query expansion for sparse results (only when well below limit)
-    if (rows.length > 0 && results.length < Math.ceil(limit / 2)) {
-      const existingIds = new Set(results.map(r => r.id));
-      expandObsByConceptCo(ctx, now, existingIds, results, includeNoise);
-      expandObsByPRF(ctx, now, rows.length, existingIds, results, includeNoise);
-    }
-    // Vector search + RRF hybrid merge
-    try {
-      const vocab = getVocabulary(db);
-      if (vocab) {
-        const queryText = ftsQuery.replace(/['"()]/g, ' ');
-        const queryVec = computeVector(queryText, vocab);
-        if (queryVec) {
-          const vecResults = vectorSearch(db, queryVec, {
-            project: args.project ?? null,
-            type: args.obs_type ?? null,
-            vocabVersion: vocab.version,
-          });
-          if (vecResults.length > 0 && results.length > 0) {
-            // RRF merge: combine BM25 ranked results with vector ranked results
-            const rrfRanking = rrfMerge(results, vecResults);
-            const resultMap = new Map(results.map(r => [r.id, r]));
-            // Add vector-only results (found by similarity but not by FTS5)
-            for (const vr of vecResults) {
-              if (!resultMap.has(vr.id)) {
-                const obs = db.prepare('SELECT id, type, title, subtitle, project, created_at, created_at_epoch, importance, files_modified, branch FROM observations WHERE id = ?').get(vr.id);
-                if (obs) {
-                  // Apply same filter constraints as FTS5
-                  if (epochFrom !== null && obs.created_at_epoch < epochFrom) continue;
-                  if (epochTo !== null && obs.created_at_epoch > epochTo) continue;
-                  if (args.importance && (obs.importance ?? 1) < args.importance) continue;
-                  if (args.branch && obs.branch !== args.branch) continue;
-                  // R-1: parity with FTS5 WHERE — vector path must also reject LOW_SIGNAL titles
-                  // so RRF cannot re-admit what the SQL clause excluded.
-                  if (!includeNoise && obs.title && LOW_SIGNAL_TITLE.test(obs.title)) continue;
-                  resultMap.set(vr.id, { source: 'obs', id: obs.id, type: obs.type, title: obs.title, subtitle: obs.subtitle, project: obs.project, date: obs.created_at, importance: obs.importance, files_modified: obs.files_modified, snippet: '' });
-                }
-              }
-            }
-            // Re-order by RRF score
-            const reordered = rrfRanking
-              .filter(rr => resultMap.has(rr.id))
-              .map(rr => ({ ...resultMap.get(rr.id), score: -rr.rrfScore })); // negative for BM25-compatible sort
-            results.length = 0;
-            results.push(...reordered);
-          } else if (vecResults.length > 0 && results.length === 0) {
-            // FTS5 found nothing but vector found results
-            for (const vr of vecResults) {
-              const obs = db.prepare('SELECT id, type, title, subtitle, project, created_at, created_at_epoch, importance, files_modified, branch FROM observations WHERE id = ?').get(vr.id);
-              if (!obs) continue;
-              if (epochFrom !== null && obs.created_at_epoch < epochFrom) continue;
-              if (epochTo !== null && obs.created_at_epoch > epochTo) continue;
-              if (args.importance && (obs.importance ?? 1) < args.importance) continue;
-              if (args.branch && obs.branch !== args.branch) continue;
-              if (!includeNoise && obs.title && LOW_SIGNAL_TITLE.test(obs.title)) continue;
-              results.push({ source: 'obs', id: obs.id, type: obs.type, title: obs.title, subtitle: obs.subtitle, project: obs.project, date: obs.created_at, importance: obs.importance, files_modified: obs.files_modified, score: -vr.similarity, snippet: '' });
-            }
-          }
-        }
-      }
-    } catch (e) { debugCatch(e, 'searchObservations-vector'); }
-  } else {
-    const params = [];
-    const wheres = ['COALESCE(compressed_into, 0) = 0', 'superseded_at IS NULL'];
-    if (args.project) { wheres.push('project = ?'); params.push(args.project); }
-    if (args.obs_type) { wheres.push('type = ?'); params.push(args.obs_type); }
-    if (epochFrom !== null) { wheres.push('created_at_epoch >= ?'); params.push(epochFrom); }
-    if (epochTo !== null) { wheres.push('created_at_epoch <= ?'); params.push(epochTo); }
-    if (args.importance) { wheres.push('COALESCE(importance, 1) >= ?'); params.push(args.importance); }
-    if (args.branch) { wheres.push('branch = ?'); params.push(args.branch); }
-    const where = `WHERE ${wheres.join(' AND ')}`;
-    params.push(perSourceLimit, perSourceOffset);
-    const rows = db.prepare(`
-      SELECT id, type, title, subtitle, project, created_at, created_at_epoch, files_modified, importance
-      FROM observations ${where}
-      ORDER BY created_at_epoch DESC
-      LIMIT ? OFFSET ?
-    `).all(...params);
-    for (const r of rows) {
-      results.push({ source: 'obs', id: r.id, type: r.type, title: r.title, subtitle: r.subtitle, project: r.project, date: r.created_at, created_at_epoch: r.created_at_epoch, files_modified: r.files_modified, importance: r.importance });
-    }
-  }
-  return results;
-}
-function expandObsByConceptCo(ctx, now, existingIds, results, includeNoise = false) {
-  const { ftsQuery, args, epochFrom, epochTo, limit } = ctx;
-  if (results.length >= Math.ceil(limit / 2)) return;
-  const expanded = expandQueryByConcepts(db, ftsQuery, args.project);
-  if (expanded.length === 0) return;
-  const expansionFts = expanded.map(c => `"${c.replace(/"/g, '""')}"`).join(' OR ');
-  try {
-    const expRows = db.prepare(buildObsFtsQuery('simple', { includeNoise }))
-      .all(...buildObsFtsParams({ now, ftsQuery: expansionFts, args, epochFrom, epochTo, limit }));
-    for (const r of expRows) {
-      if (!existingIds.has(r.id)) {
-        existingIds.add(r.id);
-        results.push(ftsRowToResult(r, { scoreMultiplier: 0.7 }));
-      }
-    }
-  } catch (e) { debugLog('WARN', 'mem_search', `concept expansion error: ${e.message}`); }
-}
-function expandObsByPRF(ctx, now, primaryCount, existingIds, results, includeNoise = false) {
-  const { ftsQuery, args, epochFrom, epochTo, limit } = ctx;
-  if (primaryCount < 3) return;
-  const topResults = db.prepare(`
-    SELECT o.title, o.narrative FROM observations_fts
-    JOIN observations o ON observations_fts.rowid = o.id
-    WHERE observations_fts MATCH ? AND COALESCE(o.compressed_into, 0) = 0
-      AND (? IS NULL OR o.project = ?)
-    ORDER BY ${OBS_BM25}
-    LIMIT 8
-  `).all(ftsQuery, args.project ?? null, args.project ?? null);
-  const prfTerms = extractPRFTerms(topResults, ftsQuery);
-  if (prfTerms.length === 0) return;
-  const prfFts = prfTerms.map(t => `"${t.replace(/"/g, '""')}"`).join(' OR ');
-  try {
-    const prfRows = db.prepare(buildObsFtsQuery('simple', { includeNoise }))
-      .all(...buildObsFtsParams({ now, ftsQuery: prfFts, args, epochFrom, epochTo, limit }));
-    for (const r of prfRows) {
-      if (!existingIds.has(r.id)) {
-        existingIds.add(r.id);
-        results.push(ftsRowToResult(r, { scoreMultiplier: 0.6 }));
-      }
-    }
-  } catch (e) { debugLog('WARN', 'mem_search', `PRF expansion error: ${e.message}`); }
+  return searchObservationsHybrid(db, ctx);
 }
 function searchSessions(ctx) {

package/source-files.mjs CHANGED Viewed

@@ -6,7 +6,7 @@
 export const SOURCE_FILES = [
   // Entry points and top-level modules
-  'cli.mjs', 'server.mjs', 'server-internals.mjs', 'tool-schemas.mjs',
+  'cli.mjs', 'server.mjs', 'server-internals.mjs', 'search-engine.mjs', 'tool-schemas.mjs',
   'hook.mjs', 'hook-shared.mjs', 'hook-llm.mjs', 'hook-memory.mjs', 'skip-tools.mjs',
   'hook-semaphore.mjs', 'hook-episode.mjs', 'hook-context.mjs', 'hook-handoff.mjs',
   'hook-update.mjs', 'hook-optimize.mjs',

package/tool-schemas.mjs CHANGED Viewed

@@ -318,11 +318,13 @@ export const tools = [
     name: 'mem_timeline',
     description:
       'Show observations before and after an anchor point (by ID or by FTS query).\n' +
+      'Query-anchor ranks by BM25 × time-decay → BEST topical match, not most recent.\n' +
       '\n' +
       'DO NOT use when:\n' +
       '  - You only want one record (use mem_get)\n' +
       '  - You have no anchor in mind and are just browsing (use mem_recent or mem_browse)\n' +
       '  - The sequence is obvious from commit history (use git log)\n' +
+      '  - You want "recent activity around X" (use mem_recent or mem_search sort="time")\n' +
       '\n' +
       'USE when:\n' +
       '  - Reconstructing what led up to / followed a specific bug or decision\n' +