claude-mem-lite 2.51.0 → 2.53.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -10,7 +10,7 @@
10
10
  "plugins": [
11
11
  {
12
12
  "name": "claude-mem-lite",
13
- "version": "2.51.0",
13
+ "version": "2.53.0",
14
14
  "source": "./",
15
15
  "description": "Lightweight persistent memory system for Claude Code — FTS5 search, episode batching, error-triggered recall"
16
16
  }
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "claude-mem-lite",
3
- "version": "2.51.0",
3
+ "version": "2.53.0",
4
4
  "description": "Lightweight persistent memory system for Claude Code — FTS5 search, episode batching, error-triggered recall",
5
5
  "author": {
6
6
  "name": "sdsrss"
package/cli/doctor.mjs CHANGED
@@ -12,7 +12,32 @@ export async function cmdDoctor(db, args) {
12
12
  if (args.includes('--benchmark')) {
13
13
  const { runBenchmark } = await import('../lib/doctor-benchmark.mjs');
14
14
  const project = inferProject();
15
- const result = runBenchmark(db, { project });
15
+ // Sample recent user prompts so the CLI report has non-null injection_rate
16
+ // and hook latency. Without this, runBenchmark's prompts default of [] makes
17
+ // every metric 0/null — a dead command from the user's perspective. Tests
18
+ // bypass this CLI layer and call runBenchmark() directly, so the lib API
19
+ // contract (default prompts=[]) is unchanged.
20
+ let prompts = [];
21
+ try {
22
+ const limitIdx = args.indexOf('--prompts-limit');
23
+ let limit = 50;
24
+ if (limitIdx >= 0 && args[limitIdx + 1]) {
25
+ const parsed = parseInt(args[limitIdx + 1], 10);
26
+ if (Number.isFinite(parsed) && parsed > 0 && parsed <= 1000) limit = parsed;
27
+ }
28
+ const rows = db.prepare(`
29
+ SELECT p.prompt_text
30
+ FROM user_prompts p
31
+ JOIN sdk_sessions s ON p.content_session_id = s.content_session_id
32
+ WHERE s.project = ?
33
+ AND p.prompt_text IS NOT NULL
34
+ AND length(p.prompt_text) >= 15
35
+ ORDER BY p.created_at_epoch DESC
36
+ LIMIT ?
37
+ `).all(project, limit);
38
+ prompts = rows.map(r => r.prompt_text).filter(Boolean);
39
+ } catch { /* missing/empty tables on a fresh DB → leave prompts=[] */ }
40
+ const result = runBenchmark(db, { project, prompts });
16
41
  out(JSON.stringify(result, null, 2));
17
42
  return;
18
43
  }
package/hook-context.mjs CHANGED
@@ -369,19 +369,24 @@ export function buildSessionContextLines(db, project, now = new Date(), currentC
369
369
  // 5. Working state from latest /clear handoff.
370
370
  // Session scoping: when currentCcSessionId is provided, restrict to this session's
371
371
  // own clear handoff so parallel sessions don't see each other's Working State block.
372
+ // TTL: drop handoffs older than 48h. Without it, `cmdContext` (no session id) would
373
+ // surface a /clear from days ago as "current Working State" — confusing when the user
374
+ // has long moved on. 48h covers overnight breaks but excludes truly stale state.
375
+ const HANDOFF_TTL_MS = 48 * 60 * 60 * 1000;
376
+ const handoffMinEpoch = Date.now() - HANDOFF_TTL_MS;
372
377
  const prevClearHandoff = currentCcSessionId
373
378
  ? db.prepare(`
374
379
  SELECT working_on, unfinished, key_files
375
380
  FROM session_handoffs
376
- WHERE project = ? AND type = 'clear' AND session_id = ?
381
+ WHERE project = ? AND type = 'clear' AND session_id = ? AND created_at_epoch > ?
377
382
  ORDER BY created_at_epoch DESC LIMIT 1
378
- `).get(project, currentCcSessionId)
383
+ `).get(project, currentCcSessionId, handoffMinEpoch)
379
384
  : db.prepare(`
380
385
  SELECT working_on, unfinished, key_files
381
386
  FROM session_handoffs
382
- WHERE project = ? AND type = 'clear'
387
+ WHERE project = ? AND type = 'clear' AND created_at_epoch > ?
383
388
  ORDER BY created_at_epoch DESC LIMIT 1
384
- `).get(project);
389
+ `).get(project, handoffMinEpoch);
385
390
 
386
391
  const handoffLines = [];
387
392
  if (prevClearHandoff) {
package/hook.mjs CHANGED
@@ -27,6 +27,7 @@ import {
27
27
  extractErrorKeywords, extractFilePaths, isRelatedToEpisode,
28
28
  makeEntryDesc, scrubSecrets, EDIT_TOOLS, debugCatch, debugLog,
29
29
  COMPRESSED_AUTO, COMPRESSED_PENDING_PURGE, isoWeekKey, OBS_BM25,
30
+ computeMinHash, estimateJaccardFromMinHash, jaccardSimilarity,
30
31
  } from './utils.mjs';
31
32
  import {
32
33
  readEpisodeRaw, episodeFile,
@@ -747,7 +748,8 @@ async function handleSessionStart() {
747
748
  `).run();
748
749
  if (boosted.changes > 0) debugLog('DEBUG', 'auto-maintain', `boosted ${boosted.changes} frequently-accessed observations`);
749
750
 
750
- // Auto-dedup: merge near-identical observations (same title, same project, within 1h)
751
+ // Auto-dedup (exact): merge identical-title observations within 1h.
752
+ // Catches rapid duplicate writes (same hook firing twice, race conditions).
751
753
  const dupPairs = db.prepare(`
752
754
  SELECT a.id as keep_id, b.id as remove_id
753
755
  FROM observations a
@@ -765,6 +767,55 @@ async function handleSessionStart() {
765
767
  debugLog('DEBUG', 'auto-maintain', `auto-deduped ${dupPairs.length} near-identical observations`);
766
768
  }
767
769
 
770
+ // Auto-dedup (fuzzy): catches near-identical titles that exact-match
771
+ // misses across larger time windows — e.g. episode-batch titles like
772
+ // "Modified A.mjs, B.mjs" vs "Modified B.mjs, A.mjs" written days apart.
773
+ // MinHash pre-filter (≥0.7) cuts the O(N²) scan; Jaccard ≥0.95 stays
774
+ // well clear of legit "two updates same area" pairs (those typically
775
+ // score 0.7–0.85, surfaced via `maintain scan` for manual review).
776
+ // Bounded by ${SCAN_LIMIT} recent rows × ${FUZZY_MAX_MERGES}-merge cap.
777
+ if (!process.env.CLAUDE_MEM_SKIP_AUTO_DEDUP_FUZZY) {
778
+ const SCAN_LIMIT = 500;
779
+ const FUZZY_MAX_MERGES = 20;
780
+ const FUZZY_THRESHOLD = 0.95;
781
+ const MINHASH_PREFILTER = 0.7;
782
+ const recent = db.prepare(`
783
+ SELECT id, title, importance, created_at_epoch
784
+ FROM observations
785
+ WHERE COALESCE(compressed_into, 0) = 0
786
+ AND superseded_at IS NULL
787
+ AND created_at_epoch > ?
788
+ AND title IS NOT NULL AND title != ''
789
+ ORDER BY created_at_epoch DESC LIMIT ${SCAN_LIMIT}
790
+ `).all(STALE_AGE);
791
+ if (recent.length >= 2) {
792
+ const titles = recent.map(r => r.title.trim());
793
+ const minhashes = titles.map(t => t ? computeMinHash(t) : null);
794
+ const fuzzyRemoveIds = [];
795
+ const removed = new Set();
796
+ outer: for (let i = 0; i < recent.length; i++) {
797
+ if (!minhashes[i] || removed.has(recent[i].id)) continue;
798
+ for (let j = i + 1; j < recent.length; j++) {
799
+ if (!minhashes[j] || removed.has(recent[j].id)) continue;
800
+ if (estimateJaccardFromMinHash(minhashes[i], minhashes[j]) < MINHASH_PREFILTER) continue;
801
+ if (jaccardSimilarity(titles[i], titles[j]) < FUZZY_THRESHOLD) continue;
802
+ // Keep the higher-importance row; tiebreak by older (lower id wins access history)
803
+ const keep = (recent[i].importance ?? 1) >= (recent[j].importance ?? 1) ? recent[i] : recent[j];
804
+ const remove = keep === recent[i] ? recent[j] : recent[i];
805
+ fuzzyRemoveIds.push(remove.id);
806
+ removed.add(remove.id);
807
+ if (fuzzyRemoveIds.length >= FUZZY_MAX_MERGES) break outer;
808
+ }
809
+ }
810
+ if (fuzzyRemoveIds.length > 0) {
811
+ const ph = fuzzyRemoveIds.map(() => '?').join(',');
812
+ db.prepare(`UPDATE observations SET superseded_at = ?, superseded_by = 'auto-dedup-fuzzy' WHERE id IN (${ph})`)
813
+ .run(Date.now(), ...fuzzyRemoveIds);
814
+ debugLog('DEBUG', 'auto-maintain', `fuzzy auto-deduped ${fuzzyRemoveIds.length} near-identical observations`);
815
+ }
816
+ }
817
+ }
818
+
768
819
  // Mark maintenance as done (24h gate) — even though compression runs in background
769
820
  writeFileSync(maintainFile, JSON.stringify({ epoch: Date.now() }));
770
821
  // Weekly summary grouping runs in background to avoid blocking SessionStart
@@ -5,6 +5,7 @@
5
5
 
6
6
  import { notLowSignalTitleClause } from '../scoring-sql.mjs';
7
7
  import { truncate } from '../format-utils.mjs';
8
+ import { COMPRESSED_PENDING_PURGE } from '../utils.mjs';
8
9
 
9
10
  export function computeQualityStats(db, { project, days }) {
10
11
  const projectFilter = project ? 'AND project = ?' : '';
@@ -69,11 +70,22 @@ export function computeQualityStats(db, { project, days }) {
69
70
  LIMIT 5
70
71
  `).all(...baseParams);
71
72
 
72
- return { windowRow, allTimeRow, typeRows, topLessons, project, days };
73
+ // Pending-purge backlog: compressed records waiting on the time-based purge gate.
74
+ // High ratio signals push/pull imbalance — auto-mark fires daily but purge needs
75
+ // age > 37d, so a sudden write surge inflates this until the cohort ages out.
76
+ const purgeRow = db.prepare(`
77
+ SELECT
78
+ SUM(CASE WHEN compressed_into IS NOT NULL AND compressed_into != 0 THEN 1 ELSE 0 END) as compressed,
79
+ SUM(CASE WHEN compressed_into = ${COMPRESSED_PENDING_PURGE} THEN 1 ELSE 0 END) as pending_purge
80
+ FROM observations
81
+ WHERE 1=1 ${projectFilter}
82
+ `).get(...baseParams);
83
+
84
+ return { windowRow, allTimeRow, typeRows, topLessons, purgeRow, project, days };
73
85
  }
74
86
 
75
87
  export function formatQualityReport(data) {
76
- const { windowRow, allTimeRow, typeRows, topLessons, project, days } = data;
88
+ const { windowRow, allTimeRow, typeRows, topLessons, purgeRow, project, days } = data;
77
89
  const pct = (n, d) => d > 0 ? (100 * n / d).toFixed(1) : '0.0';
78
90
  const scope = project ? ` — ${project}` : '';
79
91
  const lines = [];
@@ -126,5 +138,16 @@ export function formatQualityReport(data) {
126
138
  lines.push(` ${lessonStatus} Lesson rate ≥ 15% → currently ${lessonPct}% (gap ${lessonGap >= 0 ? '+' : ''}${lessonGap}pp)`);
127
139
  lines.push(` ${noiseStatus} LOW_SIGNAL ≤ 30% → currently ${noisePct}% (gap ${noiseGap >= 0 ? '+' : ''}${noiseGap}pp)`);
128
140
 
141
+ // Pending-purge ratio: fraction of compressed records still waiting deletion.
142
+ // Compressed-but-not-yet-purged is normal (37d retention floor); a high ratio
143
+ // either means a recent write surge OR that auto-maintain isn't running.
144
+ if (purgeRow && (purgeRow.compressed ?? 0) > 0) {
145
+ const purgePct = pct(purgeRow.pending_purge, purgeRow.compressed);
146
+ const purgeNum = parseFloat(purgePct);
147
+ const purgeGap = (purgeNum - 10).toFixed(1);
148
+ const purgeStatus = purgeNum <= 10 ? '✅' : (purgeNum <= 30 ? '🟡' : '🔴');
149
+ lines.push(` ${purgeStatus} Pending purge ≤ 10% → currently ${purgePct}% (${purgeRow.pending_purge}/${purgeRow.compressed}) (gap ${purgeGap >= 0 ? '+' : ''}${purgeGap}pp)${purgeNum > 10 ? ' — run: claude-mem-lite maintain execute --ops purge_stale --confirm' : ''}`);
150
+ }
151
+
129
152
  return lines.join('\n');
130
153
  }
package/mem-cli.mjs CHANGED
@@ -4,13 +4,14 @@
4
4
 
5
5
  import { homedir } from 'os';
6
6
  import { ensureDb, DB_PATH, REGISTRY_DB_PATH } from './schema.mjs';
7
- import { sanitizeFtsQuery, relaxFtsQueryToOr, truncate, typeIcon, inferProject, jaccardSimilarity, computeMinHash, estimateJaccardFromMinHash, scrubSecrets, cjkBigrams, isoWeekKey, COMPRESSED_PENDING_PURGE, OBS_BM25, SESS_BM25, TYPE_DECAY_CASE, TYPE_QUALITY_CASE, DEFAULT_DECAY_HALF_LIFE_MS, getCurrentBranch, notLowSignalTitleClause, LOW_SIGNAL_TITLE } from './utils.mjs';
7
+ import { sanitizeFtsQuery, relaxFtsQueryToOr, truncate, typeIcon, inferProject, jaccardSimilarity, computeMinHash, estimateJaccardFromMinHash, scrubSecrets, cjkBigrams, isoWeekKey, COMPRESSED_PENDING_PURGE, OBS_BM25, SESS_BM25, DEFAULT_DECAY_HALF_LIFE_MS, getCurrentBranch, notLowSignalTitleClause } from './utils.mjs';
8
8
  import { cjkPrecisionOk } from './nlp.mjs';
9
9
  import { extractCjkLikePatterns } from './nlp.mjs';
10
10
  import { resolveProject } from './project-utils.mjs';
11
11
  import { computeTier, TIER_CASE_SQL, tierSqlParams } from './tier.mjs';
12
- import { getVocabulary, computeVector, vectorSearch, rrfMerge, VECTOR_SCAN_LIMIT, rebuildVocabulary, _resetVocabCache } from './tfidf.mjs';
13
- import { autoBoostIfNeeded, reRankWithContext, markSuperseded, extractPRFTerms, expandQueryByConcepts } from './server-internals.mjs';
12
+ import { getVocabulary, computeVector, rebuildVocabulary, _resetVocabCache } from './tfidf.mjs';
13
+ import { autoBoostIfNeeded, reRankWithContext, markSuperseded } from './server-internals.mjs';
14
+ import { searchObservationsHybrid } from './search-engine.mjs';
14
15
  import { ensureRegistryDb, upsertResource } from './registry.mjs';
15
16
  import { searchResources } from './registry-retriever.mjs';
16
17
  import { optimizePreview, optimizeRun } from './hook-optimize.mjs';
@@ -72,6 +73,7 @@ function cmdSearch(db, args) {
72
73
  // error logs, etc.) which are otherwise filtered from default search. Use for auditing or
73
74
  // when explicitly searching for a file/command that produced a degraded title.
74
75
  const includeNoise = flags['include-noise'] === true || flags['include-noise'] === 'true';
76
+ const jsonOutput = flags.json === true || flags.json === 'true';
75
77
 
76
78
  if (source && !['observations', 'sessions', 'prompts'].includes(source)) {
77
79
  fail(`[mem] Invalid --source "${source}". Use: observations, sessions, prompts`);
@@ -94,87 +96,42 @@ function cmdSearch(db, args) {
94
96
  // When --type/--tier/--importance (obs-only fields) is specified, implicitly restrict to observations
95
97
  const effectiveSource = source || ((type || tier || minImportance) ? 'observations' : null);
96
98
 
99
+ // Cross-source mode: each source needs more candidates than the final limit
100
+ // so the post-merge sort has room to pick the best from each (paired-path with
101
+ // server.mjs:377 — without this, obs gets systematically squeezed out by sessions).
102
+ const isCrossSourceMode = !effectiveSource;
103
+ const perSourceLimit = isCrossSourceMode ? Math.max(limit * 3, offset + limit + 10) : limit;
104
+ const perSourceOffset = isCrossSourceMode ? 0 : offset;
105
+
97
106
  const results = [];
98
107
  // Tracks whether AND returned 0 and OR recovered non-empty. Mirrors server.mjs
99
108
  // ctx.orFallbackFired so the header can surface a "(relaxed AND→OR)" hint.
100
109
  let orFallbackFired = false;
101
110
 
102
- // Search observations
111
+ // Search observations — shared engine with server.mjs (#8198/#8212 paired-path fix)
103
112
  if (!effectiveSource || effectiveSource === 'observations') {
104
- let obsRows = searchFts(db, ftsQuery, { type, project, limit, dateFrom, dateTo, minImportance, branch, includeNoise, offset: effectiveSource ? offset : 0 });
105
- if (obsRows.length === 0) {
106
- const orQuery = relaxFtsQueryToOr(ftsQuery);
107
- if (orQuery) {
108
- try {
109
- obsRows = searchFts(db, orQuery, { type, project, limit, dateFrom, dateTo, minImportance, branch, includeNoise, offset: effectiveSource ? offset : 0 });
110
- if (obsRows.length > 0) orFallbackFired = true;
111
- } catch {}
112
- }
113
- }
114
- // Type-list fallback
115
- if (obsRows.length === 0 && type) {
116
- const typeWheres = ['COALESCE(compressed_into, 0) = 0', 'superseded_at IS NULL', 'type = ?'];
117
- const typeParams = [type];
118
- if (project) { typeWheres.push('project = ?'); typeParams.push(project); }
119
- if (dateFrom) { typeWheres.push('created_at_epoch >= ?'); typeParams.push(dateFrom); }
120
- if (dateTo) { typeWheres.push('created_at_epoch <= ?'); typeParams.push(dateTo); }
121
- if (minImportance) { typeWheres.push('COALESCE(importance, 1) >= ?'); typeParams.push(minImportance); }
122
- if (branch) { typeWheres.push('branch = ?'); typeParams.push(branch); }
123
- typeParams.push(limit);
124
- obsRows = db.prepare(`
125
- SELECT id, type, title, subtitle, created_at, lesson_learned
126
- FROM observations
127
- WHERE ${typeWheres.join(' AND ')}
128
- ORDER BY created_at_epoch DESC
129
- LIMIT ?
130
- `).all(...typeParams);
131
- }
132
- for (const r of obsRows) results.push({ ...r, _source: 'obs', score: r.score ?? 0 });
133
-
134
- // Concept co-occurrence + PRF expansion (aligned with MCP searchObservations)
135
- if (obsRows.length > 0 && results.filter(r => r._source === 'obs').length < Math.ceil(limit / 2)) {
136
- const existingIds = new Set(results.filter(r => r._source === 'obs').map(r => r.id));
137
- // Concept co-occurrence expansion
138
- const expanded = expandQueryByConcepts(db, ftsQuery, project || null);
139
- if (expanded.length > 0) {
140
- const expansionFts = expanded.map(c => `"${c.replace(/"/g, '""')}"`).join(' OR ');
141
- try {
142
- const expRows = searchFts(db, expansionFts, { type, project, limit, dateFrom, dateTo, minImportance, branch, includeNoise, offset: 0 });
143
- for (const r of expRows) {
144
- if (!existingIds.has(r.id)) {
145
- existingIds.add(r.id);
146
- results.push({ ...r, _source: 'obs', score: (r.score ?? 0) * 0.7 });
147
- }
148
- }
149
- } catch { /* expansion is best-effort */ }
150
- }
151
- // PRF expansion (only if ≥3 primary results)
152
- if (obsRows.length >= 3) {
153
- const topResults = db.prepare(`
154
- SELECT o.title, o.narrative FROM observations_fts
155
- JOIN observations o ON observations_fts.rowid = o.id
156
- WHERE observations_fts MATCH ? AND COALESCE(o.compressed_into, 0) = 0
157
- AND (? IS NULL OR o.project = ?)
158
- ORDER BY ${OBS_BM25}
159
- LIMIT 8
160
- `).all(ftsQuery, project ?? null, project ?? null);
161
- const prfTerms = extractPRFTerms(topResults, ftsQuery);
162
- if (prfTerms.length > 0) {
163
- const prfFts = prfTerms.map(t => `"${t.replace(/"/g, '""')}"`).join(' OR ');
164
- try {
165
- const prfRows = searchFts(db, prfFts, { type, project, limit, dateFrom, dateTo, minImportance, branch, includeNoise, offset: 0 });
166
- for (const r of prfRows) {
167
- if (!existingIds.has(r.id)) {
168
- existingIds.add(r.id);
169
- results.push({ ...r, _source: 'obs', score: (r.score ?? 0) * 0.6 });
170
- }
171
- }
172
- } catch { /* PRF is best-effort */ }
173
- }
174
- }
175
- }
176
-
177
- // Tier post-filter — applied to ALL obs results (initial + expansion + PRF)
113
+ const obsCtx = {
114
+ ftsQuery,
115
+ args: {
116
+ project: project || null,
117
+ obs_type: type || null,
118
+ importance: minImportance || null,
119
+ branch: branch || null,
120
+ include_noise: includeNoise,
121
+ },
122
+ epochFrom: dateFrom,
123
+ epochTo: dateTo,
124
+ perSourceLimit,
125
+ perSourceOffset,
126
+ currentProject: project ? null : inferProject(),
127
+ limit,
128
+ orFallbackFired: false,
129
+ };
130
+ const obsResults = searchObservationsHybrid(db, obsCtx);
131
+ if (obsCtx.orFallbackFired) orFallbackFired = true;
132
+ for (const r of obsResults) results.push({ ...r, _source: 'obs', score: r.score ?? 0 });
133
+
134
+ // Tier post-filter applied to ALL obs results from the engine.
178
135
  if (tier) {
179
136
  const obsInResults = results.filter(r => r._source === 'obs');
180
137
  if (obsInResults.length > 0) {
@@ -205,7 +162,7 @@ function cmdSearch(db, args) {
205
162
  if (project) { sessWheres.push('s.project = ?'); sessParams.push(project); }
206
163
  if (dateFrom) { sessWheres.push('s.created_at_epoch >= ?'); sessParams.push(dateFrom); }
207
164
  if (dateTo) { sessWheres.push('s.created_at_epoch <= ?'); sessParams.push(dateTo); }
208
- sessParams.push(effectiveSource ? limit : limit, effectiveSource ? offset : 0);
165
+ sessParams.push(perSourceLimit, perSourceOffset);
209
166
  try {
210
167
  const sessRows = db.prepare(`
211
168
  SELECT s.id, s.request, s.completed, s.project, s.created_at, s.created_at_epoch,
@@ -229,7 +186,7 @@ function cmdSearch(db, args) {
229
186
  if (project) { promptWheres.push('s.project = ?'); promptParams.push(project); }
230
187
  if (dateFrom) { promptWheres.push('p.created_at_epoch >= ?'); promptParams.push(dateFrom); }
231
188
  if (dateTo) { promptWheres.push('p.created_at_epoch <= ?'); promptParams.push(dateTo); }
232
- promptParams.push(effectiveSource ? limit : limit, effectiveSource ? offset : 0);
189
+ promptParams.push(perSourceLimit, perSourceOffset);
233
190
  try {
234
191
  const promptRows = db.prepare(`
235
192
  SELECT p.id, p.prompt_text, p.content_session_id, p.created_at, p.created_at_epoch,
@@ -256,7 +213,7 @@ function cmdSearch(db, args) {
256
213
  if (project) likeParams.push(project);
257
214
  if (dateFrom) likeParams.push(dateFrom);
258
215
  if (dateTo) likeParams.push(dateTo);
259
- likeParams.push(effectiveSource ? limit : limit, effectiveSource ? offset : 0);
216
+ likeParams.push(perSourceLimit, perSourceOffset);
260
217
  const fallbackRows = db.prepare(`
261
218
  SELECT p.id, p.prompt_text, p.content_session_id, p.created_at, p.created_at_epoch
262
219
  FROM user_prompts p
@@ -281,13 +238,18 @@ function cmdSearch(db, args) {
281
238
  }
282
239
 
283
240
  if (results.length === 0) {
284
- out(`[mem] No results for "${query}"`);
241
+ if (jsonOutput) {
242
+ out(JSON.stringify({ query, total: 0, returned: 0, offset, limit, results: [] }));
243
+ } else {
244
+ out(`[mem] No results for "${query}"`);
245
+ }
285
246
  return;
286
247
  }
287
248
 
288
- // Cross-source score normalization (aligned with MCP mem_search)
289
- const isCrossSource = !effectiveSource;
290
- if (isCrossSource && results.length > 0) {
249
+ // Cross-source score normalization (paired-path with server.mjs:428).
250
+ // ftsQuery gate prevents normalization when scores are all 0 (no-FTS path).
251
+ const isCrossSource = isCrossSourceMode;
252
+ if (isCrossSource && results.length > 0 && ftsQuery) {
291
253
  for (const src of ['obs', 'session', 'prompt']) {
292
254
  const srcResults = results.filter(r => r._source === src && r.score !== null && r.score !== undefined);
293
255
  if (srcResults.length < 2) continue;
@@ -318,18 +280,63 @@ function cmdSearch(db, args) {
318
280
  // else 'relevance' keeps BM25 score order (already sorted)
319
281
 
320
282
  // Trim to limit with offset
283
+ const total = results.length;
321
284
  const paged = results.slice(offset, offset + limit);
322
285
 
323
286
  if (paged.length === 0) {
324
- out(`[mem] No results for "${query}" at offset ${offset}`);
287
+ if (jsonOutput) {
288
+ out(JSON.stringify({ query, total, returned: 0, offset, limit, results: [] }));
289
+ } else {
290
+ out(`[mem] No results for "${query}" at offset ${offset}`);
291
+ }
325
292
  return;
326
293
  }
327
294
 
295
+ // paired-path with server.mjs formatSearchOutput (#8198): "N of M" total when paged < total.
328
296
  const showTime = sort === 'time';
329
297
  const hasMixed = paged.some(r => r._source === 'session' || r._source === 'prompt');
330
298
  // Suppressed when --or was explicit — user already asked for OR, no "fallback" there.
331
299
  const fallbackHint = orFallbackFired && !useOr ? ' (relaxed AND→OR)' : '';
332
- out(`[mem] ${paged.length} result${paged.length !== 1 ? 's' : ''} for "${query}"${fallbackHint}:${hasMixed ? ' (# observation, S# session, P# prompt)' : ''}`);
300
+
301
+ if (jsonOutput) {
302
+ const items = paged.map(r => {
303
+ const base = {
304
+ source: r._source,
305
+ id: r.id,
306
+ created_at: r.created_at,
307
+ score: r.score ?? null,
308
+ };
309
+ if (r._source === 'session') {
310
+ return { ...base, request: r.request || null, completed: r.completed || null, project: r.project || null };
311
+ }
312
+ if (r._source === 'prompt') {
313
+ return { ...base, prompt_text: r.prompt_text || null };
314
+ }
315
+ return {
316
+ ...base,
317
+ type: r.type,
318
+ title: r.title || r.subtitle || null,
319
+ lesson_learned: r.lesson_learned || null,
320
+ importance: r.importance ?? null,
321
+ superseded: Boolean(r.superseded),
322
+ files_modified: r.files_modified || null,
323
+ };
324
+ });
325
+ out(JSON.stringify({
326
+ query,
327
+ total,
328
+ returned: paged.length,
329
+ offset,
330
+ limit,
331
+ relaxed_and_to_or: orFallbackFired && !useOr,
332
+ mixed_sources: hasMixed,
333
+ results: items,
334
+ }));
335
+ return;
336
+ }
337
+
338
+ const countLabel = total > paged.length ? `${paged.length} of ${total}` : `${paged.length}`;
339
+ out(`[mem] Found ${countLabel} result${paged.length !== 1 ? 's' : ''} for "${query}"${fallbackHint}:${hasMixed ? ' (# observation, S# session, P# prompt)' : ''}`);
333
340
  for (const r of paged) {
334
341
  const timeStr = showTime && r.created_at_epoch ? ` (${relativeTime(r.created_at_epoch)})` : '';
335
342
  if (r._source === 'session') {
@@ -350,115 +357,6 @@ function cmdSearch(db, args) {
350
357
  }
351
358
  }
352
359
 
353
- function searchFts(db, ftsQuery, { type, project, limit, dateFrom, dateTo, minImportance, branch, includeNoise, offset }) {
354
- const now = Date.now();
355
- // Current project for boost (2× when no explicit project filter)
356
- const currentProject = !project ? inferProject() : null;
357
-
358
- // WHERE clause params (positional ? in SQL order)
359
- const whereParams = [ftsQuery];
360
- const wheres = [
361
- 'observations_fts MATCH ?',
362
- 'COALESCE(o.compressed_into, 0) = 0',
363
- 'o.superseded_at IS NULL',
364
- ];
365
- if (project) { wheres.push('o.project = ?'); whereParams.push(project); }
366
- if (type) { wheres.push('o.type = ?'); whereParams.push(type); }
367
- if (dateFrom) { wheres.push('o.created_at_epoch >= ?'); whereParams.push(dateFrom); }
368
- if (dateTo) { wheres.push('o.created_at_epoch <= ?'); whereParams.push(dateTo); }
369
- if (minImportance) { wheres.push('COALESCE(o.importance, 1) >= ?'); whereParams.push(minImportance); }
370
- if (branch) { wheres.push('o.branch = ?'); whereParams.push(branch); }
371
- // R-1: exclude hook-llm fallback titles ("Modified X", "Worked on X", raw error logs)
372
- // from default search. They compete for BM25 rank but have ~3% access rate. Mirrors the
373
- // filter already applied in hook-memory.mjs, hook-context.mjs, and user-prompt-search.js.
374
- // Use --include-noise to audit them.
375
- if (!includeNoise) wheres.push(notLowSignalTitleClause('o'));
376
-
377
- // Param order: SELECT scoring (now, proj, proj) → WHERE (ftsQuery, filters...) → ORDER BY scoring (now, proj, proj) → LIMIT/OFFSET
378
- const scoreParams = [now, currentProject, currentProject];
379
- const params = [...scoreParams, ...whereParams, ...scoreParams, limit, offset || 0];
380
-
381
- // Scoring aligned with server.mjs: BM25 × type-decay × type-quality × project_boost × importance × access_bonus × lesson-boost
382
- // R-3: lesson_learned presence adds a +0.3 multiplier (empirical: +6.3pp hit-rate lift on bugfix).
383
- const ftsRows = db.prepare(`
384
- SELECT o.id, o.type, o.title, o.subtitle, o.created_at, o.created_at_epoch, o.lesson_learned,
385
- o.files_modified, o.importance,
386
- ${OBS_BM25}
387
- * (1.0 + EXP(-0.693 * (? - MAX(o.created_at_epoch, COALESCE(o.last_accessed_at, o.created_at_epoch))) / ${TYPE_DECAY_CASE}))
388
- * ${TYPE_QUALITY_CASE}
389
- * (CASE WHEN ? IS NOT NULL AND o.project = ? THEN 2.0 ELSE 1.0 END)
390
- * (0.5 + 0.5 * COALESCE(o.importance, 1))
391
- * (1.0 + 0.1 * LN(1 + COALESCE(o.access_count, 0)))
392
- * (1.0 + 0.3 * (o.lesson_learned IS NOT NULL)) as score
393
- FROM observations_fts
394
- JOIN observations o ON observations_fts.rowid = o.id
395
- WHERE ${wheres.join(' AND ')}
396
- ORDER BY ${OBS_BM25}
397
- * (1.0 + EXP(-0.693 * (? - MAX(o.created_at_epoch, COALESCE(o.last_accessed_at, o.created_at_epoch))) / ${TYPE_DECAY_CASE}))
398
- * ${TYPE_QUALITY_CASE}
399
- * (CASE WHEN ? IS NOT NULL AND o.project = ? THEN 2.0 ELSE 1.0 END)
400
- * (0.5 + 0.5 * COALESCE(o.importance, 1))
401
- * (1.0 + 0.1 * LN(1 + COALESCE(o.access_count, 0)))
402
- * (1.0 + 0.3 * (o.lesson_learned IS NOT NULL))
403
- LIMIT ? OFFSET ?
404
- `).all(...params);
405
-
406
- // Hybrid: vector search + RRF merge (best-effort)
407
- try {
408
- const vocab = getVocabulary(db);
409
- if (vocab) {
410
- const queryText = ftsQuery.replace(/['"()]/g, ' ');
411
- const queryVec = computeVector(queryText, vocab);
412
- if (queryVec) {
413
- const vecResults = vectorSearch(db, queryVec, {
414
- project: project || null,
415
- vocabVersion: vocab.version,
416
- limit: VECTOR_SCAN_LIMIT,
417
- });
418
- if (vecResults.length > 0 && ftsRows.length > 0) {
419
- const rrfRanking = rrfMerge(ftsRows, vecResults);
420
- const rowMap = new Map(ftsRows.map(r => [r.id, r]));
421
- for (const vr of vecResults) {
422
- if (!rowMap.has(vr.id)) {
423
- const obs = db.prepare('SELECT id, type, title, subtitle, created_at, created_at_epoch, lesson_learned, importance, branch, files_modified FROM observations WHERE id = ?').get(vr.id);
424
- if (obs) {
425
- // Apply same filters as FTS5 query (aligned with MCP searchObservations)
426
- if (dateFrom && obs.created_at_epoch < dateFrom) continue;
427
- if (dateTo && obs.created_at_epoch > dateTo) continue;
428
- if (minImportance && (obs.importance ?? 1) < minImportance) continue;
429
- if (branch && obs.branch !== branch) continue;
430
- // R-1: LOW_SIGNAL filter also applies to vector-side additions (the SQL
431
- // clause only filtered the FTS5 side) so RRF can't re-admit noise.
432
- if (!includeNoise && obs.title && LOW_SIGNAL_TITLE.test(obs.title)) continue;
433
- rowMap.set(vr.id, obs);
434
- }
435
- }
436
- }
437
- return rrfRanking
438
- .filter(rr => rowMap.has(rr.id))
439
- .map(rr => rowMap.get(rr.id))
440
- .slice(0, limit);
441
- } else if (vecResults.length > 0 && ftsRows.length === 0) {
442
- return vecResults
443
- .map(vr => db.prepare('SELECT id, type, title, subtitle, created_at, created_at_epoch, lesson_learned, importance, branch FROM observations WHERE id = ?').get(vr.id))
444
- .filter(obs => {
445
- if (!obs) return false;
446
- if (dateFrom && obs.created_at_epoch < dateFrom) return false;
447
- if (dateTo && obs.created_at_epoch > dateTo) return false;
448
- if (minImportance && (obs.importance ?? 1) < minImportance) return false;
449
- if (branch && obs.branch !== branch) return false;
450
- if (!includeNoise && obs.title && LOW_SIGNAL_TITLE.test(obs.title)) return false;
451
- return true;
452
- })
453
- .slice(0, limit);
454
- }
455
- }
456
- }
457
- } catch { /* vector search is best-effort */ }
458
-
459
- return ftsRows;
460
- }
461
-
462
360
  function cmdRecent(db, args) {
463
361
  const { positional, flags } = parseArgs(args);
464
362
  const rawArg = positional[0];
@@ -1985,6 +1883,7 @@ Commands:
1985
1883
  --sort S Sort: relevance (default), time, importance
1986
1884
  --or Use OR instead of AND between search terms
1987
1885
  --include-noise Include hook-llm fallback titles ("Modified X", raw error logs)
1886
+ --json Output as JSON: {query,total,returned,offset,limit,results:[…]}
1988
1887
 
1989
1888
  recent [N] Show N most recent observations (default 10)
1990
1889
  --project P Filter by project
@@ -2002,7 +1901,11 @@ Commands:
2002
1901
  timeline Show observations around an anchor (shows recent if no anchor)
2003
1902
  --anchor ID Center on this ID. Accepts N, #N, P#N, or S#N — P#/S# anchors
2004
1903
  resolve to the nearest-in-time observation in the same project.
2005
- --query "text" Find anchor by FTS5 search
1904
+ --query "text" Find anchor by FTS5 search. Ranks by BM25 × time-decay,
1905
+ so multi-term queries surface the BEST topical match
1906
+ (highest term coverage), not the most recent. For
1907
+ "recent activity around X", use 'recent' or
1908
+ 'search "X" --sort time' instead.
2006
1909
  --before N Show N before anchor (default 5)
2007
1910
  --after N Show N after anchor (default 5)
2008
1911
  --project P Filter by project
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "claude-mem-lite",
3
- "version": "2.51.0",
3
+ "version": "2.53.0",
4
4
  "description": "Lightweight persistent memory system for Claude Code",
5
5
  "type": "module",
6
6
  "engines": {
@@ -26,6 +26,7 @@
26
26
  "mem-cli.mjs",
27
27
  "server.mjs",
28
28
  "server-internals.mjs",
29
+ "search-engine.mjs",
29
30
  "hook.mjs",
30
31
  "hook-shared.mjs",
31
32
  "hook-llm.mjs",
@@ -0,0 +1,249 @@
1
+ // Shared observation-search engine — the single source of truth for
2
+ // hybrid FTS5 + vector ranking, OR fallback, concept/PRF expansion, and
3
+ // RRF merge. Both server.mjs (mem_search MCP tool) and mem-cli.mjs (search CLI)
4
+ // import these helpers so identical queries return identical candidate sets
5
+ // and rankings. See #8198 / #8212 for the prior paired-path divergence this
6
+ // module exists to eliminate.
7
+
8
+ import {
9
+ OBS_BM25, TYPE_DECAY_CASE, TYPE_QUALITY_CASE,
10
+ notLowSignalTitleClause, LOW_SIGNAL_TITLE,
11
+ relaxFtsQueryToOr, debugLog, debugCatch,
12
+ } from './utils.mjs';
13
+ import { getVocabulary, computeVector, vectorSearch, rrfMerge } from './tfidf.mjs';
14
+ import { extractPRFTerms, expandQueryByConcepts } from './server-internals.mjs';
15
+
16
+ // Scoring expressions — full adds project boost + access bonus; simple is for
17
+ // expansion paths where boost would over-amplify already-loose matches.
18
+ const FULL_SCORE = `${OBS_BM25}
19
+ * (1.0 + EXP(-0.693 * (? - MAX(o.created_at_epoch, COALESCE(o.last_accessed_at, o.created_at_epoch))) / ${TYPE_DECAY_CASE}))
20
+ * ${TYPE_QUALITY_CASE}
21
+ * (CASE WHEN ? IS NOT NULL AND o.project = ? THEN 2.0 ELSE 1.0 END)
22
+ * (0.5 + 0.5 * COALESCE(o.importance, 1))
23
+ * (1.0 + 0.1 * LN(1 + COALESCE(o.access_count, 0)))
24
+ * (1.0 + 0.3 * (o.lesson_learned IS NOT NULL))`;
25
+
26
+ const SIMPLE_SCORE = `${OBS_BM25}
27
+ * (1.0 + EXP(-0.693 * (? - MAX(o.created_at_epoch, COALESCE(o.last_accessed_at, o.created_at_epoch))) / ${TYPE_DECAY_CASE}))
28
+ * ${TYPE_QUALITY_CASE}
29
+ * (0.5 + 0.5 * COALESCE(o.importance, 1))
30
+ * (1.0 + 0.3 * (o.lesson_learned IS NOT NULL))`;
31
+
32
+ export function buildObsFtsQuery(scoring, { multiplier, withSnippet, withOffset, includeNoise } = {}) {
33
+ const scoreExpr = scoring === 'full' ? FULL_SCORE : SIMPLE_SCORE;
34
+ const mult = multiplier ? ` * ${multiplier}` : '';
35
+ const lowSignalClause = includeNoise ? '' : `AND ${notLowSignalTitleClause('o')}`;
36
+ return `
37
+ SELECT o.id, o.type, o.title, o.subtitle, o.project, o.created_at, o.created_at_epoch, o.importance,
38
+ o.files_modified, o.lesson_learned,
39
+ ${withSnippet ? "snippet(observations_fts, 2, '»', '«', '…', 10) as match_snippet," : ''}
40
+ ${scoreExpr}${mult} as score
41
+ FROM observations_fts
42
+ JOIN observations o ON observations_fts.rowid = o.id
43
+ WHERE observations_fts MATCH ?
44
+ AND COALESCE(o.compressed_into, 0) = 0
45
+ AND o.superseded_at IS NULL
46
+ AND (? IS NULL OR o.project = ?)
47
+ AND (? IS NULL OR o.type = ?)
48
+ AND (? IS NULL OR o.created_at_epoch >= ?)
49
+ AND (? IS NULL OR o.created_at_epoch <= ?)
50
+ AND (? IS NULL OR COALESCE(o.importance, 1) >= ?)
51
+ AND (? IS NULL OR o.branch = ?)
52
+ ${lowSignalClause}
53
+ ORDER BY score
54
+ LIMIT ?${withOffset ? ' OFFSET ?' : ''}`;
55
+ }
56
+
57
+ export function buildObsFtsParams({ now, projectBoost, ftsQuery, args, epochFrom, epochTo, limit, offset }) {
58
+ const params = [now];
59
+ if (projectBoost !== undefined) params.push(projectBoost, projectBoost);
60
+ params.push(
61
+ ftsQuery,
62
+ args.project ?? null, args.project ?? null,
63
+ args.obs_type ?? null, args.obs_type ?? null,
64
+ epochFrom, epochFrom,
65
+ epochTo, epochTo,
66
+ args.importance ?? null, args.importance ?? null,
67
+ args.branch ?? null, args.branch ?? null,
68
+ limit,
69
+ );
70
+ if (offset !== undefined) params.push(offset);
71
+ return params;
72
+ }
73
+
74
+ export function ftsRowToResult(r, { scoreMultiplier, snippet } = {}) {
75
+ return {
76
+ source: 'obs', id: r.id, type: r.type, title: r.title, subtitle: r.subtitle,
77
+ project: r.project, date: r.created_at, created_at_epoch: r.created_at_epoch,
78
+ score: scoreMultiplier ? r.score * scoreMultiplier : r.score,
79
+ files_modified: r.files_modified, importance: r.importance, lesson_learned: r.lesson_learned,
80
+ snippet: snippet ? (r.match_snippet || '') : '',
81
+ };
82
+ }
83
+
84
+ function expandObsByConceptCo(db, ctx, now, existingIds, results, includeNoise = false) {
85
+ const { ftsQuery, args, epochFrom, epochTo, limit } = ctx;
86
+ if (results.length >= Math.ceil(limit / 2)) return;
87
+ const expanded = expandQueryByConcepts(db, ftsQuery, args.project);
88
+ if (expanded.length === 0) return;
89
+ const expansionFts = expanded.map(c => `"${c.replace(/"/g, '""')}"`).join(' OR ');
90
+ try {
91
+ const expRows = db.prepare(buildObsFtsQuery('simple', { includeNoise }))
92
+ .all(...buildObsFtsParams({ now, ftsQuery: expansionFts, args, epochFrom, epochTo, limit }));
93
+ for (const r of expRows) {
94
+ if (!existingIds.has(r.id)) {
95
+ existingIds.add(r.id);
96
+ results.push(ftsRowToResult(r, { scoreMultiplier: 0.7 }));
97
+ }
98
+ }
99
+ } catch (e) { debugLog('WARN', 'search-engine', `concept expansion error: ${e.message}`); }
100
+ }
101
+
102
+ function expandObsByPRF(db, ctx, now, primaryCount, existingIds, results, includeNoise = false) {
103
+ const { ftsQuery, args, epochFrom, epochTo, limit } = ctx;
104
+ if (primaryCount < 3) return;
105
+ const topResults = db.prepare(`
106
+ SELECT o.title, o.narrative FROM observations_fts
107
+ JOIN observations o ON observations_fts.rowid = o.id
108
+ WHERE observations_fts MATCH ? AND COALESCE(o.compressed_into, 0) = 0
109
+ AND (? IS NULL OR o.project = ?)
110
+ ORDER BY ${OBS_BM25}
111
+ LIMIT 8
112
+ `).all(ftsQuery, args.project ?? null, args.project ?? null);
113
+ const prfTerms = extractPRFTerms(topResults, ftsQuery);
114
+ if (prfTerms.length === 0) return;
115
+ const prfFts = prfTerms.map(t => `"${t.replace(/"/g, '""')}"`).join(' OR ');
116
+ try {
117
+ const prfRows = db.prepare(buildObsFtsQuery('simple', { includeNoise }))
118
+ .all(...buildObsFtsParams({ now, ftsQuery: prfFts, args, epochFrom, epochTo, limit }));
119
+ for (const r of prfRows) {
120
+ if (!existingIds.has(r.id)) {
121
+ existingIds.add(r.id);
122
+ results.push(ftsRowToResult(r, { scoreMultiplier: 0.6 }));
123
+ }
124
+ }
125
+ } catch (e) { debugLog('WARN', 'search-engine', `PRF expansion error: ${e.message}`); }
126
+ }
127
+
128
+ /**
129
+ * Hybrid observation search — single source of truth for FTS + vector + RRF.
130
+ *
131
+ * Pipeline (paired-path with mem-cli.mjs cmdSearch via this module):
132
+ * 1. FTS5 BM25 query (full scoring)
133
+ * 2. OR fallback when AND returned 0 → sets ctx.orFallbackFired
134
+ * 3. Concept co-occurrence expansion (when results sparse)
135
+ * 4. PRF (pseudo-relevance feedback) expansion
136
+ * 5. Vector search + RRF merge (re-ranks all results when both modes have hits)
137
+ * 6. Vector-only fallback (when FTS5 found nothing)
138
+ *
139
+ * @param {Database} db - better-sqlite3 instance
140
+ * @param {object} ctx - { ftsQuery, args, epochFrom, epochTo, perSourceLimit,
141
+ * perSourceOffset, currentProject, limit, orFallbackFired }
142
+ * @returns {Array} list of result objects (mutated ctx may set orFallbackFired)
143
+ */
144
+ export function searchObservationsHybrid(db, ctx) {
145
+ const { ftsQuery, args, epochFrom, epochTo, perSourceLimit, perSourceOffset, currentProject, limit } = ctx;
146
+ const results = [];
147
+ const includeNoise = args.include_noise === true;
148
+
149
+ if (!ftsQuery) {
150
+ const params = [];
151
+ const wheres = ['COALESCE(compressed_into, 0) = 0', 'superseded_at IS NULL'];
152
+ if (args.project) { wheres.push('project = ?'); params.push(args.project); }
153
+ if (args.obs_type) { wheres.push('type = ?'); params.push(args.obs_type); }
154
+ if (epochFrom !== null) { wheres.push('created_at_epoch >= ?'); params.push(epochFrom); }
155
+ if (epochTo !== null) { wheres.push('created_at_epoch <= ?'); params.push(epochTo); }
156
+ if (args.importance) { wheres.push('COALESCE(importance, 1) >= ?'); params.push(args.importance); }
157
+ if (args.branch) { wheres.push('branch = ?'); params.push(args.branch); }
158
+ const where = `WHERE ${wheres.join(' AND ')}`;
159
+ params.push(perSourceLimit, perSourceOffset);
160
+ const rows = db.prepare(`
161
+ SELECT id, type, title, subtitle, project, created_at, created_at_epoch, files_modified, importance, lesson_learned
162
+ FROM observations ${where}
163
+ ORDER BY created_at_epoch DESC
164
+ LIMIT ? OFFSET ?
165
+ `).all(...params);
166
+ for (const r of rows) {
167
+ results.push({ source: 'obs', id: r.id, type: r.type, title: r.title, subtitle: r.subtitle, project: r.project, date: r.created_at, created_at_epoch: r.created_at_epoch, files_modified: r.files_modified, importance: r.importance, lesson_learned: r.lesson_learned });
168
+ }
169
+ return results;
170
+ }
171
+
172
+ const now = Date.now();
173
+ const projectBoost = args.project ? null : currentProject;
174
+
175
+ const rows = db.prepare(buildObsFtsQuery('full', { withSnippet: true, withOffset: true, includeNoise }))
176
+ .all(...buildObsFtsParams({ now, projectBoost, ftsQuery, args, epochFrom, epochTo, limit: perSourceLimit, offset: perSourceOffset }));
177
+ for (const r of rows) results.push(ftsRowToResult(r, { snippet: true }));
178
+
179
+ // OR fallback — must run BEFORE vector merge so orFallbackFired reflects FTS-only state.
180
+ if (rows.length === 0) {
181
+ const orQuery = relaxFtsQueryToOr(ftsQuery);
182
+ if (orQuery) {
183
+ try {
184
+ const orRows = db.prepare(buildObsFtsQuery('full', { multiplier: 0.5, withSnippet: true, withOffset: true, includeNoise }))
185
+ .all(...buildObsFtsParams({ now, projectBoost, ftsQuery: orQuery, args, epochFrom, epochTo, limit: perSourceLimit, offset: perSourceOffset }));
186
+ if (orRows.length > 0) ctx.orFallbackFired = true;
187
+ for (const r of orRows) results.push(ftsRowToResult(r, { snippet: true }));
188
+ } catch (e) { debugCatch(e, 'searchObservationsHybrid-or-fallback'); }
189
+ }
190
+ }
191
+
192
+ // Two-phase query expansion (only when well below limit)
193
+ if (rows.length > 0 && results.length < Math.ceil(limit / 2)) {
194
+ const existingIds = new Set(results.map(r => r.id));
195
+ expandObsByConceptCo(db, ctx, now, existingIds, results, includeNoise);
196
+ expandObsByPRF(db, ctx, now, rows.length, existingIds, results, includeNoise);
197
+ }
198
+
199
+ // Vector search + RRF hybrid merge
200
+ try {
201
+ const vocab = getVocabulary(db);
202
+ if (!vocab) return results;
203
+ const queryText = ftsQuery.replace(/['"()]/g, ' ');
204
+ const queryVec = computeVector(queryText, vocab);
205
+ if (!queryVec) return results;
206
+ const vecResults = vectorSearch(db, queryVec, {
207
+ project: args.project ?? null,
208
+ type: args.obs_type ?? null,
209
+ vocabVersion: vocab.version,
210
+ });
211
+ if (vecResults.length === 0) return results;
212
+
213
+ if (results.length > 0) {
214
+ const rrfRanking = rrfMerge(results, vecResults);
215
+ const resultMap = new Map(results.map(r => [r.id, r]));
216
+ for (const vr of vecResults) {
217
+ if (!resultMap.has(vr.id)) {
218
+ const obs = db.prepare('SELECT id, type, title, subtitle, project, created_at, created_at_epoch, importance, files_modified, branch, lesson_learned FROM observations WHERE id = ?').get(vr.id);
219
+ if (!obs) continue;
220
+ if (epochFrom !== null && obs.created_at_epoch < epochFrom) continue;
221
+ if (epochTo !== null && obs.created_at_epoch > epochTo) continue;
222
+ if (args.importance && (obs.importance ?? 1) < args.importance) continue;
223
+ if (args.branch && obs.branch !== args.branch) continue;
224
+ if (!includeNoise && obs.title && LOW_SIGNAL_TITLE.test(obs.title)) continue;
225
+ resultMap.set(vr.id, { source: 'obs', id: obs.id, type: obs.type, title: obs.title, subtitle: obs.subtitle, project: obs.project, date: obs.created_at, importance: obs.importance, files_modified: obs.files_modified, lesson_learned: obs.lesson_learned, snippet: '' });
226
+ }
227
+ }
228
+ const reordered = rrfRanking
229
+ .filter(rr => resultMap.has(rr.id))
230
+ .map(rr => ({ ...resultMap.get(rr.id), score: -rr.rrfScore }));
231
+ results.length = 0;
232
+ results.push(...reordered);
233
+ } else {
234
+ // FTS5 found nothing but vector found results
235
+ for (const vr of vecResults) {
236
+ const obs = db.prepare('SELECT id, type, title, subtitle, project, created_at, created_at_epoch, importance, files_modified, branch FROM observations WHERE id = ?').get(vr.id);
237
+ if (!obs) continue;
238
+ if (epochFrom !== null && obs.created_at_epoch < epochFrom) continue;
239
+ if (epochTo !== null && obs.created_at_epoch > epochTo) continue;
240
+ if (args.importance && (obs.importance ?? 1) < args.importance) continue;
241
+ if (args.branch && obs.branch !== args.branch) continue;
242
+ if (!includeNoise && obs.title && LOW_SIGNAL_TITLE.test(obs.title)) continue;
243
+ results.push({ source: 'obs', id: obs.id, type: obs.type, title: obs.title, subtitle: obs.subtitle, project: obs.project, date: obs.created_at, importance: obs.importance, files_modified: obs.files_modified, lesson_learned: obs.lesson_learned, score: -vr.similarity, snippet: '' });
244
+ }
245
+ }
246
+ } catch (e) { debugCatch(e, 'searchObservationsHybrid-vector'); }
247
+
248
+ return results;
249
+ }
package/server.mjs CHANGED
@@ -5,11 +5,12 @@
5
5
  import { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js';
6
6
  import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js';
7
7
  import { ListToolsRequestSchema } from '@modelcontextprotocol/sdk/types.js';
8
- import { jaccardSimilarity, truncate, typeIcon, sanitizeFtsQuery, relaxFtsQueryToOr, inferProject, computeMinHash, estimateJaccardFromMinHash, scrubSecrets, cjkBigrams, fmtDate, isoWeekKey, debugLog, debugCatch, COMPRESSED_PENDING_PURGE, OBS_BM25, SESS_BM25, TYPE_DECAY_CASE, TYPE_QUALITY_CASE, getCurrentBranch, DEFAULT_DECAY_HALF_LIFE_MS, isPathConfined, notLowSignalTitleClause, LOW_SIGNAL_TITLE } from './utils.mjs';
8
+ import { jaccardSimilarity, truncate, typeIcon, sanitizeFtsQuery, relaxFtsQueryToOr, inferProject, computeMinHash, estimateJaccardFromMinHash, scrubSecrets, cjkBigrams, fmtDate, isoWeekKey, debugLog, debugCatch, COMPRESSED_PENDING_PURGE, OBS_BM25, SESS_BM25, getCurrentBranch, DEFAULT_DECAY_HALF_LIFE_MS, isPathConfined, notLowSignalTitleClause } from './utils.mjs';
9
9
  import { extractCjkLikePatterns, cjkPrecisionOk } from './nlp.mjs';
10
10
  import { resolveProject as _resolveProjectShared } from './project-utils.mjs';
11
11
  import { ensureDb, DB_PATH, REGISTRY_DB_PATH } from './schema.mjs';
12
- import { reRankWithContext, markSuperseded, extractPRFTerms, expandQueryByConcepts, autoBoostIfNeeded, runIdleCleanup, buildServerInstructions } from './server-internals.mjs';
12
+ import { reRankWithContext, markSuperseded, autoBoostIfNeeded, runIdleCleanup, buildServerInstructions } from './server-internals.mjs';
13
+ import { searchObservationsHybrid } from './search-engine.mjs';
13
14
  import { effectiveQuiet } from './hook-shared.mjs';
14
15
  import { computeTier, TIER_CASE_SQL, tierSqlParams } from './tier.mjs';
15
16
  import { memSearchSchema, memRecentSchema, memTimelineSchema, memGetSchema, memDeleteSchema, memSaveSchema, memStatsSchema, memCompressSchema, memMaintainSchema, memOptimizeSchema, memUpdateSchema, memExportSchema, memRecallSchema, memFtsCheckSchema, memRegistrySchema, memBrowseSchema, memUseSchema, tools as TOOL_DEFS } from './tool-schemas.mjs';
@@ -28,7 +29,7 @@ import { homedir } from 'os';
28
29
  import { ensureRegistryDb, upsertResource } from './registry.mjs';
29
30
  import { searchResources } from './registry-retriever.mjs';
30
31
  import { probeOtherSources as probeIdSources, parseIdToken, bucketIdTokens } from './lib/id-routing.mjs';
31
- import { getVocabulary, rebuildVocabulary, _resetVocabCache, computeVector, vectorSearch, rrfMerge } from './tfidf.mjs';
32
+ import { getVocabulary, rebuildVocabulary, _resetVocabCache, computeVector } from './tfidf.mjs';
32
33
  import { createRequire } from 'module';
33
34
 
34
35
  const require = createRequire(import.meta.url);
@@ -145,241 +146,13 @@ function safeHandler(fn) {
145
146
  // TYPE_DECAY_CASE imported from utils.mjs
146
147
 
147
148
  // Score expression variants for FTS5 queries (see Scoring Model Constants above)
148
- // TYPE_QUALITY_CASE demotes bugfix (×0.6) and promotes decision/discovery (×1.5/1.3)
149
- // R-3: lesson_learned presence adds ×1.3 boost — empirical +6.3pp hit-rate lift on bugfix.
150
- const FULL_SCORE = `${OBS_BM25}
151
- * (1.0 + EXP(-0.693 * (? - MAX(o.created_at_epoch, COALESCE(o.last_accessed_at, o.created_at_epoch))) / ${TYPE_DECAY_CASE}))
152
- * ${TYPE_QUALITY_CASE}
153
- * (CASE WHEN ? IS NOT NULL AND o.project = ? THEN 2.0 ELSE 1.0 END)
154
- * (0.5 + 0.5 * COALESCE(o.importance, 1))
155
- * (1.0 + 0.1 * LN(1 + COALESCE(o.access_count, 0)))
156
- * (1.0 + 0.3 * (o.lesson_learned IS NOT NULL))`;
157
-
158
- const SIMPLE_SCORE = `${OBS_BM25}
159
- * (1.0 + EXP(-0.693 * (? - MAX(o.created_at_epoch, COALESCE(o.last_accessed_at, o.created_at_epoch))) / ${TYPE_DECAY_CASE}))
160
- * ${TYPE_QUALITY_CASE}
161
- * (0.5 + 0.5 * COALESCE(o.importance, 1))
162
- * (1.0 + 0.3 * (o.lesson_learned IS NOT NULL))`;
163
-
164
- /**
165
- * Build an FTS5 observation search query.
166
- * @param {'full'|'simple'} scoring - full includes project boost + access bonus
167
- * @param {object} opts - { multiplier, withSnippet, withOffset, includeNoise }
168
- * includeNoise=true keeps hook-llm fallback titles ("Modified X", "Worked on X", etc.);
169
- * default false mirrors the filter already applied in hook-memory.mjs / user-prompt-search.js.
170
- */
171
- function buildObsFtsQuery(scoring, { multiplier, withSnippet, withOffset, includeNoise } = {}) {
172
- const scoreExpr = scoring === 'full' ? FULL_SCORE : SIMPLE_SCORE;
173
- const mult = multiplier ? ` * ${multiplier}` : '';
174
- const lowSignalClause = includeNoise ? '' : `AND ${notLowSignalTitleClause('o')}`;
175
- return `
176
- SELECT o.id, o.type, o.title, o.subtitle, o.project, o.created_at, o.created_at_epoch, o.importance,
177
- o.files_modified,
178
- ${withSnippet ? "snippet(observations_fts, 2, '»', '«', '…', 10) as match_snippet," : ''}
179
- ${scoreExpr}${mult} as score
180
- FROM observations_fts
181
- JOIN observations o ON observations_fts.rowid = o.id
182
- WHERE observations_fts MATCH ?
183
- AND COALESCE(o.compressed_into, 0) = 0
184
- AND o.superseded_at IS NULL
185
- AND (? IS NULL OR o.project = ?)
186
- AND (? IS NULL OR o.type = ?)
187
- AND (? IS NULL OR o.created_at_epoch >= ?)
188
- AND (? IS NULL OR o.created_at_epoch <= ?)
189
- AND (? IS NULL OR COALESCE(o.importance, 1) >= ?)
190
- AND (? IS NULL OR o.branch = ?)
191
- ${lowSignalClause}
192
- ORDER BY score
193
- LIMIT ?${withOffset ? ' OFFSET ?' : ''}`;
194
- }
195
-
196
- /** Build params array for an FTS5 observation query. */
197
- function buildObsFtsParams({ now, projectBoost, ftsQuery, args, epochFrom, epochTo, limit, offset }) {
198
- const params = [now];
199
- if (projectBoost !== undefined) params.push(projectBoost, projectBoost); // full scoring only
200
- params.push(
201
- ftsQuery,
202
- args.project ?? null, args.project ?? null,
203
- args.obs_type ?? null, args.obs_type ?? null,
204
- epochFrom, epochFrom,
205
- epochTo, epochTo,
206
- args.importance ?? null, args.importance ?? null,
207
- args.branch ?? null, args.branch ?? null,
208
- limit,
209
- );
210
- if (offset !== undefined) params.push(offset);
211
- return params;
212
- }
213
-
214
- /** Map a raw FTS5 row to a result object. */
215
- function ftsRowToResult(r, { scoreMultiplier, snippet } = {}) {
216
- return {
217
- source: 'obs', id: r.id, type: r.type, title: r.title, subtitle: r.subtitle,
218
- project: r.project, date: r.created_at, created_at_epoch: r.created_at_epoch,
219
- score: scoreMultiplier ? r.score * scoreMultiplier : r.score,
220
- files_modified: r.files_modified, importance: r.importance, snippet: snippet ? (r.match_snippet || '') : '',
221
- };
222
- }
149
+ // Observation-search core (FTS query/params builders, hybrid pipeline) lives in
150
+ // search-engine.mjs so mem-cli.mjs gets the identical implementation.
223
151
 
152
+ // Thin wrapper around the shared engine — keeps the existing call sites
153
+ // (searchObservations(ctx)) without ferrying `db` through every layer.
224
154
  function searchObservations(ctx) {
225
- const { ftsQuery, args, epochFrom, epochTo, perSourceLimit, perSourceOffset, currentProject, limit } = ctx;
226
- const results = [];
227
- // R-1: hide hook-llm fallback titles unless caller explicitly opts in via include_noise=true.
228
- const includeNoise = args.include_noise === true;
229
-
230
- if (ftsQuery) {
231
- const now = Date.now();
232
- const projectBoost = args.project ? null : currentProject;
233
-
234
- const rows = db.prepare(buildObsFtsQuery('full', { withSnippet: true, withOffset: true, includeNoise }))
235
- .all(...buildObsFtsParams({ now, projectBoost, ftsQuery, args, epochFrom, epochTo, limit: perSourceLimit, offset: perSourceOffset }));
236
- for (const r of rows) results.push(ftsRowToResult(r, { snippet: true }));
237
-
238
- // OR fallback: when AND query returns 0 results, retry with OR semantics.
239
- // Sets ctx.orFallbackFired so the top-level formatter can surface a "relaxed
240
- // AND→OR" hint — without it, callers can't distinguish a strict multi-term
241
- // match from a partial single-term recovery.
242
- if (rows.length === 0) {
243
- const orQuery = relaxFtsQueryToOr(ftsQuery);
244
- if (orQuery) {
245
- try {
246
- const orRows = db.prepare(buildObsFtsQuery('full', { multiplier: 0.5, withSnippet: true, withOffset: true, includeNoise }))
247
- .all(...buildObsFtsParams({ now, projectBoost, ftsQuery: orQuery, args, epochFrom, epochTo, limit: perSourceLimit, offset: perSourceOffset }));
248
- if (orRows.length > 0) ctx.orFallbackFired = true;
249
- for (const r of orRows) results.push(ftsRowToResult(r, { snippet: true }));
250
- } catch (e) { debugCatch(e, 'searchObservations-or-fallback'); }
251
- }
252
- }
253
-
254
- // Two-phase query expansion for sparse results (only when well below limit)
255
- if (rows.length > 0 && results.length < Math.ceil(limit / 2)) {
256
- const existingIds = new Set(results.map(r => r.id));
257
- expandObsByConceptCo(ctx, now, existingIds, results, includeNoise);
258
- expandObsByPRF(ctx, now, rows.length, existingIds, results, includeNoise);
259
- }
260
-
261
- // Vector search + RRF hybrid merge
262
- try {
263
- const vocab = getVocabulary(db);
264
- if (vocab) {
265
- const queryText = ftsQuery.replace(/['"()]/g, ' ');
266
- const queryVec = computeVector(queryText, vocab);
267
- if (queryVec) {
268
- const vecResults = vectorSearch(db, queryVec, {
269
- project: args.project ?? null,
270
- type: args.obs_type ?? null,
271
- vocabVersion: vocab.version,
272
- });
273
- if (vecResults.length > 0 && results.length > 0) {
274
- // RRF merge: combine BM25 ranked results with vector ranked results
275
- const rrfRanking = rrfMerge(results, vecResults);
276
- const resultMap = new Map(results.map(r => [r.id, r]));
277
- // Add vector-only results (found by similarity but not by FTS5)
278
- for (const vr of vecResults) {
279
- if (!resultMap.has(vr.id)) {
280
- const obs = db.prepare('SELECT id, type, title, subtitle, project, created_at, created_at_epoch, importance, files_modified, branch FROM observations WHERE id = ?').get(vr.id);
281
- if (obs) {
282
- // Apply same filter constraints as FTS5
283
- if (epochFrom !== null && obs.created_at_epoch < epochFrom) continue;
284
- if (epochTo !== null && obs.created_at_epoch > epochTo) continue;
285
- if (args.importance && (obs.importance ?? 1) < args.importance) continue;
286
- if (args.branch && obs.branch !== args.branch) continue;
287
- // R-1: parity with FTS5 WHERE — vector path must also reject LOW_SIGNAL titles
288
- // so RRF cannot re-admit what the SQL clause excluded.
289
- if (!includeNoise && obs.title && LOW_SIGNAL_TITLE.test(obs.title)) continue;
290
- resultMap.set(vr.id, { source: 'obs', id: obs.id, type: obs.type, title: obs.title, subtitle: obs.subtitle, project: obs.project, date: obs.created_at, importance: obs.importance, files_modified: obs.files_modified, snippet: '' });
291
- }
292
- }
293
- }
294
- // Re-order by RRF score
295
- const reordered = rrfRanking
296
- .filter(rr => resultMap.has(rr.id))
297
- .map(rr => ({ ...resultMap.get(rr.id), score: -rr.rrfScore })); // negative for BM25-compatible sort
298
- results.length = 0;
299
- results.push(...reordered);
300
- } else if (vecResults.length > 0 && results.length === 0) {
301
- // FTS5 found nothing but vector found results
302
- for (const vr of vecResults) {
303
- const obs = db.prepare('SELECT id, type, title, subtitle, project, created_at, created_at_epoch, importance, files_modified, branch FROM observations WHERE id = ?').get(vr.id);
304
- if (!obs) continue;
305
- if (epochFrom !== null && obs.created_at_epoch < epochFrom) continue;
306
- if (epochTo !== null && obs.created_at_epoch > epochTo) continue;
307
- if (args.importance && (obs.importance ?? 1) < args.importance) continue;
308
- if (args.branch && obs.branch !== args.branch) continue;
309
- if (!includeNoise && obs.title && LOW_SIGNAL_TITLE.test(obs.title)) continue;
310
- results.push({ source: 'obs', id: obs.id, type: obs.type, title: obs.title, subtitle: obs.subtitle, project: obs.project, date: obs.created_at, importance: obs.importance, files_modified: obs.files_modified, score: -vr.similarity, snippet: '' });
311
- }
312
- }
313
- }
314
- }
315
- } catch (e) { debugCatch(e, 'searchObservations-vector'); }
316
- } else {
317
- const params = [];
318
- const wheres = ['COALESCE(compressed_into, 0) = 0', 'superseded_at IS NULL'];
319
- if (args.project) { wheres.push('project = ?'); params.push(args.project); }
320
- if (args.obs_type) { wheres.push('type = ?'); params.push(args.obs_type); }
321
- if (epochFrom !== null) { wheres.push('created_at_epoch >= ?'); params.push(epochFrom); }
322
- if (epochTo !== null) { wheres.push('created_at_epoch <= ?'); params.push(epochTo); }
323
- if (args.importance) { wheres.push('COALESCE(importance, 1) >= ?'); params.push(args.importance); }
324
- if (args.branch) { wheres.push('branch = ?'); params.push(args.branch); }
325
- const where = `WHERE ${wheres.join(' AND ')}`;
326
- params.push(perSourceLimit, perSourceOffset);
327
- const rows = db.prepare(`
328
- SELECT id, type, title, subtitle, project, created_at, created_at_epoch, files_modified, importance
329
- FROM observations ${where}
330
- ORDER BY created_at_epoch DESC
331
- LIMIT ? OFFSET ?
332
- `).all(...params);
333
- for (const r of rows) {
334
- results.push({ source: 'obs', id: r.id, type: r.type, title: r.title, subtitle: r.subtitle, project: r.project, date: r.created_at, created_at_epoch: r.created_at_epoch, files_modified: r.files_modified, importance: r.importance });
335
- }
336
- }
337
-
338
- return results;
339
- }
340
-
341
- function expandObsByConceptCo(ctx, now, existingIds, results, includeNoise = false) {
342
- const { ftsQuery, args, epochFrom, epochTo, limit } = ctx;
343
- if (results.length >= Math.ceil(limit / 2)) return;
344
- const expanded = expandQueryByConcepts(db, ftsQuery, args.project);
345
- if (expanded.length === 0) return;
346
- const expansionFts = expanded.map(c => `"${c.replace(/"/g, '""')}"`).join(' OR ');
347
- try {
348
- const expRows = db.prepare(buildObsFtsQuery('simple', { includeNoise }))
349
- .all(...buildObsFtsParams({ now, ftsQuery: expansionFts, args, epochFrom, epochTo, limit }));
350
- for (const r of expRows) {
351
- if (!existingIds.has(r.id)) {
352
- existingIds.add(r.id);
353
- results.push(ftsRowToResult(r, { scoreMultiplier: 0.7 }));
354
- }
355
- }
356
- } catch (e) { debugLog('WARN', 'mem_search', `concept expansion error: ${e.message}`); }
357
- }
358
-
359
- function expandObsByPRF(ctx, now, primaryCount, existingIds, results, includeNoise = false) {
360
- const { ftsQuery, args, epochFrom, epochTo, limit } = ctx;
361
- if (primaryCount < 3) return;
362
- const topResults = db.prepare(`
363
- SELECT o.title, o.narrative FROM observations_fts
364
- JOIN observations o ON observations_fts.rowid = o.id
365
- WHERE observations_fts MATCH ? AND COALESCE(o.compressed_into, 0) = 0
366
- AND (? IS NULL OR o.project = ?)
367
- ORDER BY ${OBS_BM25}
368
- LIMIT 8
369
- `).all(ftsQuery, args.project ?? null, args.project ?? null);
370
- const prfTerms = extractPRFTerms(topResults, ftsQuery);
371
- if (prfTerms.length === 0) return;
372
- const prfFts = prfTerms.map(t => `"${t.replace(/"/g, '""')}"`).join(' OR ');
373
- try {
374
- const prfRows = db.prepare(buildObsFtsQuery('simple', { includeNoise }))
375
- .all(...buildObsFtsParams({ now, ftsQuery: prfFts, args, epochFrom, epochTo, limit }));
376
- for (const r of prfRows) {
377
- if (!existingIds.has(r.id)) {
378
- existingIds.add(r.id);
379
- results.push(ftsRowToResult(r, { scoreMultiplier: 0.6 }));
380
- }
381
- }
382
- } catch (e) { debugLog('WARN', 'mem_search', `PRF expansion error: ${e.message}`); }
155
+ return searchObservationsHybrid(db, ctx);
383
156
  }
384
157
 
385
158
  function searchSessions(ctx) {
package/source-files.mjs CHANGED
@@ -6,7 +6,7 @@
6
6
 
7
7
  export const SOURCE_FILES = [
8
8
  // Entry points and top-level modules
9
- 'cli.mjs', 'server.mjs', 'server-internals.mjs', 'tool-schemas.mjs',
9
+ 'cli.mjs', 'server.mjs', 'server-internals.mjs', 'search-engine.mjs', 'tool-schemas.mjs',
10
10
  'hook.mjs', 'hook-shared.mjs', 'hook-llm.mjs', 'hook-memory.mjs', 'skip-tools.mjs',
11
11
  'hook-semaphore.mjs', 'hook-episode.mjs', 'hook-context.mjs', 'hook-handoff.mjs',
12
12
  'hook-update.mjs', 'hook-optimize.mjs',
package/tool-schemas.mjs CHANGED
@@ -318,11 +318,13 @@ export const tools = [
318
318
  name: 'mem_timeline',
319
319
  description:
320
320
  'Show observations before and after an anchor point (by ID or by FTS query).\n' +
321
+ 'Query-anchor ranks by BM25 × time-decay → BEST topical match, not most recent.\n' +
321
322
  '\n' +
322
323
  'DO NOT use when:\n' +
323
324
  ' - You only want one record (use mem_get)\n' +
324
325
  ' - You have no anchor in mind and are just browsing (use mem_recent or mem_browse)\n' +
325
326
  ' - The sequence is obvious from commit history (use git log)\n' +
327
+ ' - You want "recent activity around X" (use mem_recent or mem_search sort="time")\n' +
326
328
  '\n' +
327
329
  'USE when:\n' +
328
330
  ' - Reconstructing what led up to / followed a specific bug or decision\n' +