claude-mem-lite 2.51.0 → 2.53.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude-plugin/marketplace.json +1 -1
- package/.claude-plugin/plugin.json +1 -1
- package/cli/doctor.mjs +26 -1
- package/hook-context.mjs +9 -4
- package/hook.mjs +52 -1
- package/lib/stats-quality.mjs +25 -2
- package/mem-cli.mjs +100 -197
- package/package.json +2 -1
- package/search-engine.mjs +249 -0
- package/server.mjs +9 -236
- package/source-files.mjs +1 -1
- package/tool-schemas.mjs +2 -0
package/cli/doctor.mjs
CHANGED
|
@@ -12,7 +12,32 @@ export async function cmdDoctor(db, args) {
|
|
|
12
12
|
if (args.includes('--benchmark')) {
|
|
13
13
|
const { runBenchmark } = await import('../lib/doctor-benchmark.mjs');
|
|
14
14
|
const project = inferProject();
|
|
15
|
-
|
|
15
|
+
// Sample recent user prompts so the CLI report has non-null injection_rate
|
|
16
|
+
// and hook latency. Without this, runBenchmark's prompts default of [] makes
|
|
17
|
+
// every metric 0/null — a dead command from the user's perspective. Tests
|
|
18
|
+
// bypass this CLI layer and call runBenchmark() directly, so the lib API
|
|
19
|
+
// contract (default prompts=[]) is unchanged.
|
|
20
|
+
let prompts = [];
|
|
21
|
+
try {
|
|
22
|
+
const limitIdx = args.indexOf('--prompts-limit');
|
|
23
|
+
let limit = 50;
|
|
24
|
+
if (limitIdx >= 0 && args[limitIdx + 1]) {
|
|
25
|
+
const parsed = parseInt(args[limitIdx + 1], 10);
|
|
26
|
+
if (Number.isFinite(parsed) && parsed > 0 && parsed <= 1000) limit = parsed;
|
|
27
|
+
}
|
|
28
|
+
const rows = db.prepare(`
|
|
29
|
+
SELECT p.prompt_text
|
|
30
|
+
FROM user_prompts p
|
|
31
|
+
JOIN sdk_sessions s ON p.content_session_id = s.content_session_id
|
|
32
|
+
WHERE s.project = ?
|
|
33
|
+
AND p.prompt_text IS NOT NULL
|
|
34
|
+
AND length(p.prompt_text) >= 15
|
|
35
|
+
ORDER BY p.created_at_epoch DESC
|
|
36
|
+
LIMIT ?
|
|
37
|
+
`).all(project, limit);
|
|
38
|
+
prompts = rows.map(r => r.prompt_text).filter(Boolean);
|
|
39
|
+
} catch { /* missing/empty tables on a fresh DB → leave prompts=[] */ }
|
|
40
|
+
const result = runBenchmark(db, { project, prompts });
|
|
16
41
|
out(JSON.stringify(result, null, 2));
|
|
17
42
|
return;
|
|
18
43
|
}
|
package/hook-context.mjs
CHANGED
|
@@ -369,19 +369,24 @@ export function buildSessionContextLines(db, project, now = new Date(), currentC
|
|
|
369
369
|
// 5. Working state from latest /clear handoff.
|
|
370
370
|
// Session scoping: when currentCcSessionId is provided, restrict to this session's
|
|
371
371
|
// own clear handoff so parallel sessions don't see each other's Working State block.
|
|
372
|
+
// TTL: drop handoffs older than 48h. Without it, `cmdContext` (no session id) would
|
|
373
|
+
// surface a /clear from days ago as "current Working State" — confusing when the user
|
|
374
|
+
// has long moved on. 48h covers overnight breaks but excludes truly stale state.
|
|
375
|
+
const HANDOFF_TTL_MS = 48 * 60 * 60 * 1000;
|
|
376
|
+
const handoffMinEpoch = Date.now() - HANDOFF_TTL_MS;
|
|
372
377
|
const prevClearHandoff = currentCcSessionId
|
|
373
378
|
? db.prepare(`
|
|
374
379
|
SELECT working_on, unfinished, key_files
|
|
375
380
|
FROM session_handoffs
|
|
376
|
-
WHERE project = ? AND type = 'clear' AND session_id = ?
|
|
381
|
+
WHERE project = ? AND type = 'clear' AND session_id = ? AND created_at_epoch > ?
|
|
377
382
|
ORDER BY created_at_epoch DESC LIMIT 1
|
|
378
|
-
`).get(project, currentCcSessionId)
|
|
383
|
+
`).get(project, currentCcSessionId, handoffMinEpoch)
|
|
379
384
|
: db.prepare(`
|
|
380
385
|
SELECT working_on, unfinished, key_files
|
|
381
386
|
FROM session_handoffs
|
|
382
|
-
WHERE project = ? AND type = 'clear'
|
|
387
|
+
WHERE project = ? AND type = 'clear' AND created_at_epoch > ?
|
|
383
388
|
ORDER BY created_at_epoch DESC LIMIT 1
|
|
384
|
-
`).get(project);
|
|
389
|
+
`).get(project, handoffMinEpoch);
|
|
385
390
|
|
|
386
391
|
const handoffLines = [];
|
|
387
392
|
if (prevClearHandoff) {
|
package/hook.mjs
CHANGED
|
@@ -27,6 +27,7 @@ import {
|
|
|
27
27
|
extractErrorKeywords, extractFilePaths, isRelatedToEpisode,
|
|
28
28
|
makeEntryDesc, scrubSecrets, EDIT_TOOLS, debugCatch, debugLog,
|
|
29
29
|
COMPRESSED_AUTO, COMPRESSED_PENDING_PURGE, isoWeekKey, OBS_BM25,
|
|
30
|
+
computeMinHash, estimateJaccardFromMinHash, jaccardSimilarity,
|
|
30
31
|
} from './utils.mjs';
|
|
31
32
|
import {
|
|
32
33
|
readEpisodeRaw, episodeFile,
|
|
@@ -747,7 +748,8 @@ async function handleSessionStart() {
|
|
|
747
748
|
`).run();
|
|
748
749
|
if (boosted.changes > 0) debugLog('DEBUG', 'auto-maintain', `boosted ${boosted.changes} frequently-accessed observations`);
|
|
749
750
|
|
|
750
|
-
// Auto-dedup: merge
|
|
751
|
+
// Auto-dedup (exact): merge identical-title observations within 1h.
|
|
752
|
+
// Catches rapid duplicate writes (same hook firing twice, race conditions).
|
|
751
753
|
const dupPairs = db.prepare(`
|
|
752
754
|
SELECT a.id as keep_id, b.id as remove_id
|
|
753
755
|
FROM observations a
|
|
@@ -765,6 +767,55 @@ async function handleSessionStart() {
|
|
|
765
767
|
debugLog('DEBUG', 'auto-maintain', `auto-deduped ${dupPairs.length} near-identical observations`);
|
|
766
768
|
}
|
|
767
769
|
|
|
770
|
+
// Auto-dedup (fuzzy): catches near-identical titles that exact-match
|
|
771
|
+
// misses across larger time windows — e.g. episode-batch titles like
|
|
772
|
+
// "Modified A.mjs, B.mjs" vs "Modified B.mjs, A.mjs" written days apart.
|
|
773
|
+
// MinHash pre-filter (≥0.7) cuts the O(N²) scan; Jaccard ≥0.95 stays
|
|
774
|
+
// well clear of legit "two updates same area" pairs (those typically
|
|
775
|
+
// score 0.7–0.85, surfaced via `maintain scan` for manual review).
|
|
776
|
+
// Bounded by ${SCAN_LIMIT} recent rows × ${FUZZY_MAX_MERGES}-merge cap.
|
|
777
|
+
if (!process.env.CLAUDE_MEM_SKIP_AUTO_DEDUP_FUZZY) {
|
|
778
|
+
const SCAN_LIMIT = 500;
|
|
779
|
+
const FUZZY_MAX_MERGES = 20;
|
|
780
|
+
const FUZZY_THRESHOLD = 0.95;
|
|
781
|
+
const MINHASH_PREFILTER = 0.7;
|
|
782
|
+
const recent = db.prepare(`
|
|
783
|
+
SELECT id, title, importance, created_at_epoch
|
|
784
|
+
FROM observations
|
|
785
|
+
WHERE COALESCE(compressed_into, 0) = 0
|
|
786
|
+
AND superseded_at IS NULL
|
|
787
|
+
AND created_at_epoch > ?
|
|
788
|
+
AND title IS NOT NULL AND title != ''
|
|
789
|
+
ORDER BY created_at_epoch DESC LIMIT ${SCAN_LIMIT}
|
|
790
|
+
`).all(STALE_AGE);
|
|
791
|
+
if (recent.length >= 2) {
|
|
792
|
+
const titles = recent.map(r => r.title.trim());
|
|
793
|
+
const minhashes = titles.map(t => t ? computeMinHash(t) : null);
|
|
794
|
+
const fuzzyRemoveIds = [];
|
|
795
|
+
const removed = new Set();
|
|
796
|
+
outer: for (let i = 0; i < recent.length; i++) {
|
|
797
|
+
if (!minhashes[i] || removed.has(recent[i].id)) continue;
|
|
798
|
+
for (let j = i + 1; j < recent.length; j++) {
|
|
799
|
+
if (!minhashes[j] || removed.has(recent[j].id)) continue;
|
|
800
|
+
if (estimateJaccardFromMinHash(minhashes[i], minhashes[j]) < MINHASH_PREFILTER) continue;
|
|
801
|
+
if (jaccardSimilarity(titles[i], titles[j]) < FUZZY_THRESHOLD) continue;
|
|
802
|
+
// Keep the higher-importance row; tiebreak by older (lower id wins access history)
|
|
803
|
+
const keep = (recent[i].importance ?? 1) >= (recent[j].importance ?? 1) ? recent[i] : recent[j];
|
|
804
|
+
const remove = keep === recent[i] ? recent[j] : recent[i];
|
|
805
|
+
fuzzyRemoveIds.push(remove.id);
|
|
806
|
+
removed.add(remove.id);
|
|
807
|
+
if (fuzzyRemoveIds.length >= FUZZY_MAX_MERGES) break outer;
|
|
808
|
+
}
|
|
809
|
+
}
|
|
810
|
+
if (fuzzyRemoveIds.length > 0) {
|
|
811
|
+
const ph = fuzzyRemoveIds.map(() => '?').join(',');
|
|
812
|
+
db.prepare(`UPDATE observations SET superseded_at = ?, superseded_by = 'auto-dedup-fuzzy' WHERE id IN (${ph})`)
|
|
813
|
+
.run(Date.now(), ...fuzzyRemoveIds);
|
|
814
|
+
debugLog('DEBUG', 'auto-maintain', `fuzzy auto-deduped ${fuzzyRemoveIds.length} near-identical observations`);
|
|
815
|
+
}
|
|
816
|
+
}
|
|
817
|
+
}
|
|
818
|
+
|
|
768
819
|
// Mark maintenance as done (24h gate) — even though compression runs in background
|
|
769
820
|
writeFileSync(maintainFile, JSON.stringify({ epoch: Date.now() }));
|
|
770
821
|
// Weekly summary grouping runs in background to avoid blocking SessionStart
|
package/lib/stats-quality.mjs
CHANGED
|
@@ -5,6 +5,7 @@
|
|
|
5
5
|
|
|
6
6
|
import { notLowSignalTitleClause } from '../scoring-sql.mjs';
|
|
7
7
|
import { truncate } from '../format-utils.mjs';
|
|
8
|
+
import { COMPRESSED_PENDING_PURGE } from '../utils.mjs';
|
|
8
9
|
|
|
9
10
|
export function computeQualityStats(db, { project, days }) {
|
|
10
11
|
const projectFilter = project ? 'AND project = ?' : '';
|
|
@@ -69,11 +70,22 @@ export function computeQualityStats(db, { project, days }) {
|
|
|
69
70
|
LIMIT 5
|
|
70
71
|
`).all(...baseParams);
|
|
71
72
|
|
|
72
|
-
|
|
73
|
+
// Pending-purge backlog: compressed records waiting on the time-based purge gate.
|
|
74
|
+
// High ratio signals push/pull imbalance — auto-mark fires daily but purge needs
|
|
75
|
+
// age > 37d, so a sudden write surge inflates this until the cohort ages out.
|
|
76
|
+
const purgeRow = db.prepare(`
|
|
77
|
+
SELECT
|
|
78
|
+
SUM(CASE WHEN compressed_into IS NOT NULL AND compressed_into != 0 THEN 1 ELSE 0 END) as compressed,
|
|
79
|
+
SUM(CASE WHEN compressed_into = ${COMPRESSED_PENDING_PURGE} THEN 1 ELSE 0 END) as pending_purge
|
|
80
|
+
FROM observations
|
|
81
|
+
WHERE 1=1 ${projectFilter}
|
|
82
|
+
`).get(...baseParams);
|
|
83
|
+
|
|
84
|
+
return { windowRow, allTimeRow, typeRows, topLessons, purgeRow, project, days };
|
|
73
85
|
}
|
|
74
86
|
|
|
75
87
|
export function formatQualityReport(data) {
|
|
76
|
-
const { windowRow, allTimeRow, typeRows, topLessons, project, days } = data;
|
|
88
|
+
const { windowRow, allTimeRow, typeRows, topLessons, purgeRow, project, days } = data;
|
|
77
89
|
const pct = (n, d) => d > 0 ? (100 * n / d).toFixed(1) : '0.0';
|
|
78
90
|
const scope = project ? ` — ${project}` : '';
|
|
79
91
|
const lines = [];
|
|
@@ -126,5 +138,16 @@ export function formatQualityReport(data) {
|
|
|
126
138
|
lines.push(` ${lessonStatus} Lesson rate ≥ 15% → currently ${lessonPct}% (gap ${lessonGap >= 0 ? '+' : ''}${lessonGap}pp)`);
|
|
127
139
|
lines.push(` ${noiseStatus} LOW_SIGNAL ≤ 30% → currently ${noisePct}% (gap ${noiseGap >= 0 ? '+' : ''}${noiseGap}pp)`);
|
|
128
140
|
|
|
141
|
+
// Pending-purge ratio: fraction of compressed records still waiting deletion.
|
|
142
|
+
// Compressed-but-not-yet-purged is normal (37d retention floor); a high ratio
|
|
143
|
+
// either means a recent write surge OR that auto-maintain isn't running.
|
|
144
|
+
if (purgeRow && (purgeRow.compressed ?? 0) > 0) {
|
|
145
|
+
const purgePct = pct(purgeRow.pending_purge, purgeRow.compressed);
|
|
146
|
+
const purgeNum = parseFloat(purgePct);
|
|
147
|
+
const purgeGap = (purgeNum - 10).toFixed(1);
|
|
148
|
+
const purgeStatus = purgeNum <= 10 ? '✅' : (purgeNum <= 30 ? '🟡' : '🔴');
|
|
149
|
+
lines.push(` ${purgeStatus} Pending purge ≤ 10% → currently ${purgePct}% (${purgeRow.pending_purge}/${purgeRow.compressed}) (gap ${purgeGap >= 0 ? '+' : ''}${purgeGap}pp)${purgeNum > 10 ? ' — run: claude-mem-lite maintain execute --ops purge_stale --confirm' : ''}`);
|
|
150
|
+
}
|
|
151
|
+
|
|
129
152
|
return lines.join('\n');
|
|
130
153
|
}
|
package/mem-cli.mjs
CHANGED
|
@@ -4,13 +4,14 @@
|
|
|
4
4
|
|
|
5
5
|
import { homedir } from 'os';
|
|
6
6
|
import { ensureDb, DB_PATH, REGISTRY_DB_PATH } from './schema.mjs';
|
|
7
|
-
import { sanitizeFtsQuery, relaxFtsQueryToOr, truncate, typeIcon, inferProject, jaccardSimilarity, computeMinHash, estimateJaccardFromMinHash, scrubSecrets, cjkBigrams, isoWeekKey, COMPRESSED_PENDING_PURGE, OBS_BM25, SESS_BM25,
|
|
7
|
+
import { sanitizeFtsQuery, relaxFtsQueryToOr, truncate, typeIcon, inferProject, jaccardSimilarity, computeMinHash, estimateJaccardFromMinHash, scrubSecrets, cjkBigrams, isoWeekKey, COMPRESSED_PENDING_PURGE, OBS_BM25, SESS_BM25, DEFAULT_DECAY_HALF_LIFE_MS, getCurrentBranch, notLowSignalTitleClause } from './utils.mjs';
|
|
8
8
|
import { cjkPrecisionOk } from './nlp.mjs';
|
|
9
9
|
import { extractCjkLikePatterns } from './nlp.mjs';
|
|
10
10
|
import { resolveProject } from './project-utils.mjs';
|
|
11
11
|
import { computeTier, TIER_CASE_SQL, tierSqlParams } from './tier.mjs';
|
|
12
|
-
import { getVocabulary, computeVector,
|
|
13
|
-
import { autoBoostIfNeeded, reRankWithContext, markSuperseded
|
|
12
|
+
import { getVocabulary, computeVector, rebuildVocabulary, _resetVocabCache } from './tfidf.mjs';
|
|
13
|
+
import { autoBoostIfNeeded, reRankWithContext, markSuperseded } from './server-internals.mjs';
|
|
14
|
+
import { searchObservationsHybrid } from './search-engine.mjs';
|
|
14
15
|
import { ensureRegistryDb, upsertResource } from './registry.mjs';
|
|
15
16
|
import { searchResources } from './registry-retriever.mjs';
|
|
16
17
|
import { optimizePreview, optimizeRun } from './hook-optimize.mjs';
|
|
@@ -72,6 +73,7 @@ function cmdSearch(db, args) {
|
|
|
72
73
|
// error logs, etc.) which are otherwise filtered from default search. Use for auditing or
|
|
73
74
|
// when explicitly searching for a file/command that produced a degraded title.
|
|
74
75
|
const includeNoise = flags['include-noise'] === true || flags['include-noise'] === 'true';
|
|
76
|
+
const jsonOutput = flags.json === true || flags.json === 'true';
|
|
75
77
|
|
|
76
78
|
if (source && !['observations', 'sessions', 'prompts'].includes(source)) {
|
|
77
79
|
fail(`[mem] Invalid --source "${source}". Use: observations, sessions, prompts`);
|
|
@@ -94,87 +96,42 @@ function cmdSearch(db, args) {
|
|
|
94
96
|
// When --type/--tier/--importance (obs-only fields) is specified, implicitly restrict to observations
|
|
95
97
|
const effectiveSource = source || ((type || tier || minImportance) ? 'observations' : null);
|
|
96
98
|
|
|
99
|
+
// Cross-source mode: each source needs more candidates than the final limit
|
|
100
|
+
// so the post-merge sort has room to pick the best from each (paired-path with
|
|
101
|
+
// server.mjs:377 — without this, obs gets systematically squeezed out by sessions).
|
|
102
|
+
const isCrossSourceMode = !effectiveSource;
|
|
103
|
+
const perSourceLimit = isCrossSourceMode ? Math.max(limit * 3, offset + limit + 10) : limit;
|
|
104
|
+
const perSourceOffset = isCrossSourceMode ? 0 : offset;
|
|
105
|
+
|
|
97
106
|
const results = [];
|
|
98
107
|
// Tracks whether AND returned 0 and OR recovered non-empty. Mirrors server.mjs
|
|
99
108
|
// ctx.orFallbackFired so the header can surface a "(relaxed AND→OR)" hint.
|
|
100
109
|
let orFallbackFired = false;
|
|
101
110
|
|
|
102
|
-
// Search observations
|
|
111
|
+
// Search observations — shared engine with server.mjs (#8198/#8212 paired-path fix)
|
|
103
112
|
if (!effectiveSource || effectiveSource === 'observations') {
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
}
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
FROM observations
|
|
127
|
-
WHERE ${typeWheres.join(' AND ')}
|
|
128
|
-
ORDER BY created_at_epoch DESC
|
|
129
|
-
LIMIT ?
|
|
130
|
-
`).all(...typeParams);
|
|
131
|
-
}
|
|
132
|
-
for (const r of obsRows) results.push({ ...r, _source: 'obs', score: r.score ?? 0 });
|
|
133
|
-
|
|
134
|
-
// Concept co-occurrence + PRF expansion (aligned with MCP searchObservations)
|
|
135
|
-
if (obsRows.length > 0 && results.filter(r => r._source === 'obs').length < Math.ceil(limit / 2)) {
|
|
136
|
-
const existingIds = new Set(results.filter(r => r._source === 'obs').map(r => r.id));
|
|
137
|
-
// Concept co-occurrence expansion
|
|
138
|
-
const expanded = expandQueryByConcepts(db, ftsQuery, project || null);
|
|
139
|
-
if (expanded.length > 0) {
|
|
140
|
-
const expansionFts = expanded.map(c => `"${c.replace(/"/g, '""')}"`).join(' OR ');
|
|
141
|
-
try {
|
|
142
|
-
const expRows = searchFts(db, expansionFts, { type, project, limit, dateFrom, dateTo, minImportance, branch, includeNoise, offset: 0 });
|
|
143
|
-
for (const r of expRows) {
|
|
144
|
-
if (!existingIds.has(r.id)) {
|
|
145
|
-
existingIds.add(r.id);
|
|
146
|
-
results.push({ ...r, _source: 'obs', score: (r.score ?? 0) * 0.7 });
|
|
147
|
-
}
|
|
148
|
-
}
|
|
149
|
-
} catch { /* expansion is best-effort */ }
|
|
150
|
-
}
|
|
151
|
-
// PRF expansion (only if ≥3 primary results)
|
|
152
|
-
if (obsRows.length >= 3) {
|
|
153
|
-
const topResults = db.prepare(`
|
|
154
|
-
SELECT o.title, o.narrative FROM observations_fts
|
|
155
|
-
JOIN observations o ON observations_fts.rowid = o.id
|
|
156
|
-
WHERE observations_fts MATCH ? AND COALESCE(o.compressed_into, 0) = 0
|
|
157
|
-
AND (? IS NULL OR o.project = ?)
|
|
158
|
-
ORDER BY ${OBS_BM25}
|
|
159
|
-
LIMIT 8
|
|
160
|
-
`).all(ftsQuery, project ?? null, project ?? null);
|
|
161
|
-
const prfTerms = extractPRFTerms(topResults, ftsQuery);
|
|
162
|
-
if (prfTerms.length > 0) {
|
|
163
|
-
const prfFts = prfTerms.map(t => `"${t.replace(/"/g, '""')}"`).join(' OR ');
|
|
164
|
-
try {
|
|
165
|
-
const prfRows = searchFts(db, prfFts, { type, project, limit, dateFrom, dateTo, minImportance, branch, includeNoise, offset: 0 });
|
|
166
|
-
for (const r of prfRows) {
|
|
167
|
-
if (!existingIds.has(r.id)) {
|
|
168
|
-
existingIds.add(r.id);
|
|
169
|
-
results.push({ ...r, _source: 'obs', score: (r.score ?? 0) * 0.6 });
|
|
170
|
-
}
|
|
171
|
-
}
|
|
172
|
-
} catch { /* PRF is best-effort */ }
|
|
173
|
-
}
|
|
174
|
-
}
|
|
175
|
-
}
|
|
176
|
-
|
|
177
|
-
// Tier post-filter — applied to ALL obs results (initial + expansion + PRF)
|
|
113
|
+
const obsCtx = {
|
|
114
|
+
ftsQuery,
|
|
115
|
+
args: {
|
|
116
|
+
project: project || null,
|
|
117
|
+
obs_type: type || null,
|
|
118
|
+
importance: minImportance || null,
|
|
119
|
+
branch: branch || null,
|
|
120
|
+
include_noise: includeNoise,
|
|
121
|
+
},
|
|
122
|
+
epochFrom: dateFrom,
|
|
123
|
+
epochTo: dateTo,
|
|
124
|
+
perSourceLimit,
|
|
125
|
+
perSourceOffset,
|
|
126
|
+
currentProject: project ? null : inferProject(),
|
|
127
|
+
limit,
|
|
128
|
+
orFallbackFired: false,
|
|
129
|
+
};
|
|
130
|
+
const obsResults = searchObservationsHybrid(db, obsCtx);
|
|
131
|
+
if (obsCtx.orFallbackFired) orFallbackFired = true;
|
|
132
|
+
for (const r of obsResults) results.push({ ...r, _source: 'obs', score: r.score ?? 0 });
|
|
133
|
+
|
|
134
|
+
// Tier post-filter — applied to ALL obs results from the engine.
|
|
178
135
|
if (tier) {
|
|
179
136
|
const obsInResults = results.filter(r => r._source === 'obs');
|
|
180
137
|
if (obsInResults.length > 0) {
|
|
@@ -205,7 +162,7 @@ function cmdSearch(db, args) {
|
|
|
205
162
|
if (project) { sessWheres.push('s.project = ?'); sessParams.push(project); }
|
|
206
163
|
if (dateFrom) { sessWheres.push('s.created_at_epoch >= ?'); sessParams.push(dateFrom); }
|
|
207
164
|
if (dateTo) { sessWheres.push('s.created_at_epoch <= ?'); sessParams.push(dateTo); }
|
|
208
|
-
sessParams.push(
|
|
165
|
+
sessParams.push(perSourceLimit, perSourceOffset);
|
|
209
166
|
try {
|
|
210
167
|
const sessRows = db.prepare(`
|
|
211
168
|
SELECT s.id, s.request, s.completed, s.project, s.created_at, s.created_at_epoch,
|
|
@@ -229,7 +186,7 @@ function cmdSearch(db, args) {
|
|
|
229
186
|
if (project) { promptWheres.push('s.project = ?'); promptParams.push(project); }
|
|
230
187
|
if (dateFrom) { promptWheres.push('p.created_at_epoch >= ?'); promptParams.push(dateFrom); }
|
|
231
188
|
if (dateTo) { promptWheres.push('p.created_at_epoch <= ?'); promptParams.push(dateTo); }
|
|
232
|
-
promptParams.push(
|
|
189
|
+
promptParams.push(perSourceLimit, perSourceOffset);
|
|
233
190
|
try {
|
|
234
191
|
const promptRows = db.prepare(`
|
|
235
192
|
SELECT p.id, p.prompt_text, p.content_session_id, p.created_at, p.created_at_epoch,
|
|
@@ -256,7 +213,7 @@ function cmdSearch(db, args) {
|
|
|
256
213
|
if (project) likeParams.push(project);
|
|
257
214
|
if (dateFrom) likeParams.push(dateFrom);
|
|
258
215
|
if (dateTo) likeParams.push(dateTo);
|
|
259
|
-
likeParams.push(
|
|
216
|
+
likeParams.push(perSourceLimit, perSourceOffset);
|
|
260
217
|
const fallbackRows = db.prepare(`
|
|
261
218
|
SELECT p.id, p.prompt_text, p.content_session_id, p.created_at, p.created_at_epoch
|
|
262
219
|
FROM user_prompts p
|
|
@@ -281,13 +238,18 @@ function cmdSearch(db, args) {
|
|
|
281
238
|
}
|
|
282
239
|
|
|
283
240
|
if (results.length === 0) {
|
|
284
|
-
|
|
241
|
+
if (jsonOutput) {
|
|
242
|
+
out(JSON.stringify({ query, total: 0, returned: 0, offset, limit, results: [] }));
|
|
243
|
+
} else {
|
|
244
|
+
out(`[mem] No results for "${query}"`);
|
|
245
|
+
}
|
|
285
246
|
return;
|
|
286
247
|
}
|
|
287
248
|
|
|
288
|
-
// Cross-source score normalization (
|
|
289
|
-
|
|
290
|
-
|
|
249
|
+
// Cross-source score normalization (paired-path with server.mjs:428).
|
|
250
|
+
// ftsQuery gate prevents normalization when scores are all 0 (no-FTS path).
|
|
251
|
+
const isCrossSource = isCrossSourceMode;
|
|
252
|
+
if (isCrossSource && results.length > 0 && ftsQuery) {
|
|
291
253
|
for (const src of ['obs', 'session', 'prompt']) {
|
|
292
254
|
const srcResults = results.filter(r => r._source === src && r.score !== null && r.score !== undefined);
|
|
293
255
|
if (srcResults.length < 2) continue;
|
|
@@ -318,18 +280,63 @@ function cmdSearch(db, args) {
|
|
|
318
280
|
// else 'relevance' keeps BM25 score order (already sorted)
|
|
319
281
|
|
|
320
282
|
// Trim to limit with offset
|
|
283
|
+
const total = results.length;
|
|
321
284
|
const paged = results.slice(offset, offset + limit);
|
|
322
285
|
|
|
323
286
|
if (paged.length === 0) {
|
|
324
|
-
|
|
287
|
+
if (jsonOutput) {
|
|
288
|
+
out(JSON.stringify({ query, total, returned: 0, offset, limit, results: [] }));
|
|
289
|
+
} else {
|
|
290
|
+
out(`[mem] No results for "${query}" at offset ${offset}`);
|
|
291
|
+
}
|
|
325
292
|
return;
|
|
326
293
|
}
|
|
327
294
|
|
|
295
|
+
// paired-path with server.mjs formatSearchOutput (#8198): "N of M" total when paged < total.
|
|
328
296
|
const showTime = sort === 'time';
|
|
329
297
|
const hasMixed = paged.some(r => r._source === 'session' || r._source === 'prompt');
|
|
330
298
|
// Suppressed when --or was explicit — user already asked for OR, no "fallback" there.
|
|
331
299
|
const fallbackHint = orFallbackFired && !useOr ? ' (relaxed AND→OR)' : '';
|
|
332
|
-
|
|
300
|
+
|
|
301
|
+
if (jsonOutput) {
|
|
302
|
+
const items = paged.map(r => {
|
|
303
|
+
const base = {
|
|
304
|
+
source: r._source,
|
|
305
|
+
id: r.id,
|
|
306
|
+
created_at: r.created_at,
|
|
307
|
+
score: r.score ?? null,
|
|
308
|
+
};
|
|
309
|
+
if (r._source === 'session') {
|
|
310
|
+
return { ...base, request: r.request || null, completed: r.completed || null, project: r.project || null };
|
|
311
|
+
}
|
|
312
|
+
if (r._source === 'prompt') {
|
|
313
|
+
return { ...base, prompt_text: r.prompt_text || null };
|
|
314
|
+
}
|
|
315
|
+
return {
|
|
316
|
+
...base,
|
|
317
|
+
type: r.type,
|
|
318
|
+
title: r.title || r.subtitle || null,
|
|
319
|
+
lesson_learned: r.lesson_learned || null,
|
|
320
|
+
importance: r.importance ?? null,
|
|
321
|
+
superseded: Boolean(r.superseded),
|
|
322
|
+
files_modified: r.files_modified || null,
|
|
323
|
+
};
|
|
324
|
+
});
|
|
325
|
+
out(JSON.stringify({
|
|
326
|
+
query,
|
|
327
|
+
total,
|
|
328
|
+
returned: paged.length,
|
|
329
|
+
offset,
|
|
330
|
+
limit,
|
|
331
|
+
relaxed_and_to_or: orFallbackFired && !useOr,
|
|
332
|
+
mixed_sources: hasMixed,
|
|
333
|
+
results: items,
|
|
334
|
+
}));
|
|
335
|
+
return;
|
|
336
|
+
}
|
|
337
|
+
|
|
338
|
+
const countLabel = total > paged.length ? `${paged.length} of ${total}` : `${paged.length}`;
|
|
339
|
+
out(`[mem] Found ${countLabel} result${paged.length !== 1 ? 's' : ''} for "${query}"${fallbackHint}:${hasMixed ? ' (# observation, S# session, P# prompt)' : ''}`);
|
|
333
340
|
for (const r of paged) {
|
|
334
341
|
const timeStr = showTime && r.created_at_epoch ? ` (${relativeTime(r.created_at_epoch)})` : '';
|
|
335
342
|
if (r._source === 'session') {
|
|
@@ -350,115 +357,6 @@ function cmdSearch(db, args) {
|
|
|
350
357
|
}
|
|
351
358
|
}
|
|
352
359
|
|
|
353
|
-
function searchFts(db, ftsQuery, { type, project, limit, dateFrom, dateTo, minImportance, branch, includeNoise, offset }) {
|
|
354
|
-
const now = Date.now();
|
|
355
|
-
// Current project for boost (2× when no explicit project filter)
|
|
356
|
-
const currentProject = !project ? inferProject() : null;
|
|
357
|
-
|
|
358
|
-
// WHERE clause params (positional ? in SQL order)
|
|
359
|
-
const whereParams = [ftsQuery];
|
|
360
|
-
const wheres = [
|
|
361
|
-
'observations_fts MATCH ?',
|
|
362
|
-
'COALESCE(o.compressed_into, 0) = 0',
|
|
363
|
-
'o.superseded_at IS NULL',
|
|
364
|
-
];
|
|
365
|
-
if (project) { wheres.push('o.project = ?'); whereParams.push(project); }
|
|
366
|
-
if (type) { wheres.push('o.type = ?'); whereParams.push(type); }
|
|
367
|
-
if (dateFrom) { wheres.push('o.created_at_epoch >= ?'); whereParams.push(dateFrom); }
|
|
368
|
-
if (dateTo) { wheres.push('o.created_at_epoch <= ?'); whereParams.push(dateTo); }
|
|
369
|
-
if (minImportance) { wheres.push('COALESCE(o.importance, 1) >= ?'); whereParams.push(minImportance); }
|
|
370
|
-
if (branch) { wheres.push('o.branch = ?'); whereParams.push(branch); }
|
|
371
|
-
// R-1: exclude hook-llm fallback titles ("Modified X", "Worked on X", raw error logs)
|
|
372
|
-
// from default search. They compete for BM25 rank but have ~3% access rate. Mirrors the
|
|
373
|
-
// filter already applied in hook-memory.mjs, hook-context.mjs, and user-prompt-search.js.
|
|
374
|
-
// Use --include-noise to audit them.
|
|
375
|
-
if (!includeNoise) wheres.push(notLowSignalTitleClause('o'));
|
|
376
|
-
|
|
377
|
-
// Param order: SELECT scoring (now, proj, proj) → WHERE (ftsQuery, filters...) → ORDER BY scoring (now, proj, proj) → LIMIT/OFFSET
|
|
378
|
-
const scoreParams = [now, currentProject, currentProject];
|
|
379
|
-
const params = [...scoreParams, ...whereParams, ...scoreParams, limit, offset || 0];
|
|
380
|
-
|
|
381
|
-
// Scoring aligned with server.mjs: BM25 × type-decay × type-quality × project_boost × importance × access_bonus × lesson-boost
|
|
382
|
-
// R-3: lesson_learned presence adds a +0.3 multiplier (empirical: +6.3pp hit-rate lift on bugfix).
|
|
383
|
-
const ftsRows = db.prepare(`
|
|
384
|
-
SELECT o.id, o.type, o.title, o.subtitle, o.created_at, o.created_at_epoch, o.lesson_learned,
|
|
385
|
-
o.files_modified, o.importance,
|
|
386
|
-
${OBS_BM25}
|
|
387
|
-
* (1.0 + EXP(-0.693 * (? - MAX(o.created_at_epoch, COALESCE(o.last_accessed_at, o.created_at_epoch))) / ${TYPE_DECAY_CASE}))
|
|
388
|
-
* ${TYPE_QUALITY_CASE}
|
|
389
|
-
* (CASE WHEN ? IS NOT NULL AND o.project = ? THEN 2.0 ELSE 1.0 END)
|
|
390
|
-
* (0.5 + 0.5 * COALESCE(o.importance, 1))
|
|
391
|
-
* (1.0 + 0.1 * LN(1 + COALESCE(o.access_count, 0)))
|
|
392
|
-
* (1.0 + 0.3 * (o.lesson_learned IS NOT NULL)) as score
|
|
393
|
-
FROM observations_fts
|
|
394
|
-
JOIN observations o ON observations_fts.rowid = o.id
|
|
395
|
-
WHERE ${wheres.join(' AND ')}
|
|
396
|
-
ORDER BY ${OBS_BM25}
|
|
397
|
-
* (1.0 + EXP(-0.693 * (? - MAX(o.created_at_epoch, COALESCE(o.last_accessed_at, o.created_at_epoch))) / ${TYPE_DECAY_CASE}))
|
|
398
|
-
* ${TYPE_QUALITY_CASE}
|
|
399
|
-
* (CASE WHEN ? IS NOT NULL AND o.project = ? THEN 2.0 ELSE 1.0 END)
|
|
400
|
-
* (0.5 + 0.5 * COALESCE(o.importance, 1))
|
|
401
|
-
* (1.0 + 0.1 * LN(1 + COALESCE(o.access_count, 0)))
|
|
402
|
-
* (1.0 + 0.3 * (o.lesson_learned IS NOT NULL))
|
|
403
|
-
LIMIT ? OFFSET ?
|
|
404
|
-
`).all(...params);
|
|
405
|
-
|
|
406
|
-
// Hybrid: vector search + RRF merge (best-effort)
|
|
407
|
-
try {
|
|
408
|
-
const vocab = getVocabulary(db);
|
|
409
|
-
if (vocab) {
|
|
410
|
-
const queryText = ftsQuery.replace(/['"()]/g, ' ');
|
|
411
|
-
const queryVec = computeVector(queryText, vocab);
|
|
412
|
-
if (queryVec) {
|
|
413
|
-
const vecResults = vectorSearch(db, queryVec, {
|
|
414
|
-
project: project || null,
|
|
415
|
-
vocabVersion: vocab.version,
|
|
416
|
-
limit: VECTOR_SCAN_LIMIT,
|
|
417
|
-
});
|
|
418
|
-
if (vecResults.length > 0 && ftsRows.length > 0) {
|
|
419
|
-
const rrfRanking = rrfMerge(ftsRows, vecResults);
|
|
420
|
-
const rowMap = new Map(ftsRows.map(r => [r.id, r]));
|
|
421
|
-
for (const vr of vecResults) {
|
|
422
|
-
if (!rowMap.has(vr.id)) {
|
|
423
|
-
const obs = db.prepare('SELECT id, type, title, subtitle, created_at, created_at_epoch, lesson_learned, importance, branch, files_modified FROM observations WHERE id = ?').get(vr.id);
|
|
424
|
-
if (obs) {
|
|
425
|
-
// Apply same filters as FTS5 query (aligned with MCP searchObservations)
|
|
426
|
-
if (dateFrom && obs.created_at_epoch < dateFrom) continue;
|
|
427
|
-
if (dateTo && obs.created_at_epoch > dateTo) continue;
|
|
428
|
-
if (minImportance && (obs.importance ?? 1) < minImportance) continue;
|
|
429
|
-
if (branch && obs.branch !== branch) continue;
|
|
430
|
-
// R-1: LOW_SIGNAL filter also applies to vector-side additions (the SQL
|
|
431
|
-
// clause only filtered the FTS5 side) so RRF can't re-admit noise.
|
|
432
|
-
if (!includeNoise && obs.title && LOW_SIGNAL_TITLE.test(obs.title)) continue;
|
|
433
|
-
rowMap.set(vr.id, obs);
|
|
434
|
-
}
|
|
435
|
-
}
|
|
436
|
-
}
|
|
437
|
-
return rrfRanking
|
|
438
|
-
.filter(rr => rowMap.has(rr.id))
|
|
439
|
-
.map(rr => rowMap.get(rr.id))
|
|
440
|
-
.slice(0, limit);
|
|
441
|
-
} else if (vecResults.length > 0 && ftsRows.length === 0) {
|
|
442
|
-
return vecResults
|
|
443
|
-
.map(vr => db.prepare('SELECT id, type, title, subtitle, created_at, created_at_epoch, lesson_learned, importance, branch FROM observations WHERE id = ?').get(vr.id))
|
|
444
|
-
.filter(obs => {
|
|
445
|
-
if (!obs) return false;
|
|
446
|
-
if (dateFrom && obs.created_at_epoch < dateFrom) return false;
|
|
447
|
-
if (dateTo && obs.created_at_epoch > dateTo) return false;
|
|
448
|
-
if (minImportance && (obs.importance ?? 1) < minImportance) return false;
|
|
449
|
-
if (branch && obs.branch !== branch) return false;
|
|
450
|
-
if (!includeNoise && obs.title && LOW_SIGNAL_TITLE.test(obs.title)) return false;
|
|
451
|
-
return true;
|
|
452
|
-
})
|
|
453
|
-
.slice(0, limit);
|
|
454
|
-
}
|
|
455
|
-
}
|
|
456
|
-
}
|
|
457
|
-
} catch { /* vector search is best-effort */ }
|
|
458
|
-
|
|
459
|
-
return ftsRows;
|
|
460
|
-
}
|
|
461
|
-
|
|
462
360
|
function cmdRecent(db, args) {
|
|
463
361
|
const { positional, flags } = parseArgs(args);
|
|
464
362
|
const rawArg = positional[0];
|
|
@@ -1985,6 +1883,7 @@ Commands:
|
|
|
1985
1883
|
--sort S Sort: relevance (default), time, importance
|
|
1986
1884
|
--or Use OR instead of AND between search terms
|
|
1987
1885
|
--include-noise Include hook-llm fallback titles ("Modified X", raw error logs)
|
|
1886
|
+
--json Output as JSON: {query,total,returned,offset,limit,results:[…]}
|
|
1988
1887
|
|
|
1989
1888
|
recent [N] Show N most recent observations (default 10)
|
|
1990
1889
|
--project P Filter by project
|
|
@@ -2002,7 +1901,11 @@ Commands:
|
|
|
2002
1901
|
timeline Show observations around an anchor (shows recent if no anchor)
|
|
2003
1902
|
--anchor ID Center on this ID. Accepts N, #N, P#N, or S#N — P#/S# anchors
|
|
2004
1903
|
resolve to the nearest-in-time observation in the same project.
|
|
2005
|
-
--query "text" Find anchor by FTS5 search
|
|
1904
|
+
--query "text" Find anchor by FTS5 search. Ranks by BM25 × time-decay,
|
|
1905
|
+
so multi-term queries surface the BEST topical match
|
|
1906
|
+
(highest term coverage), not the most recent. For
|
|
1907
|
+
"recent activity around X", use 'recent' or
|
|
1908
|
+
'search "X" --sort time' instead.
|
|
2006
1909
|
--before N Show N before anchor (default 5)
|
|
2007
1910
|
--after N Show N after anchor (default 5)
|
|
2008
1911
|
--project P Filter by project
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "claude-mem-lite",
|
|
3
|
-
"version": "2.
|
|
3
|
+
"version": "2.53.0",
|
|
4
4
|
"description": "Lightweight persistent memory system for Claude Code",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"engines": {
|
|
@@ -26,6 +26,7 @@
|
|
|
26
26
|
"mem-cli.mjs",
|
|
27
27
|
"server.mjs",
|
|
28
28
|
"server-internals.mjs",
|
|
29
|
+
"search-engine.mjs",
|
|
29
30
|
"hook.mjs",
|
|
30
31
|
"hook-shared.mjs",
|
|
31
32
|
"hook-llm.mjs",
|
|
@@ -0,0 +1,249 @@
|
|
|
1
|
+
// Shared observation-search engine — the single source of truth for
|
|
2
|
+
// hybrid FTS5 + vector ranking, OR fallback, concept/PRF expansion, and
|
|
3
|
+
// RRF merge. Both server.mjs (mem_search MCP tool) and mem-cli.mjs (search CLI)
|
|
4
|
+
// import these helpers so identical queries return identical candidate sets
|
|
5
|
+
// and rankings. See #8198 / #8212 for the prior paired-path divergence this
|
|
6
|
+
// module exists to eliminate.
|
|
7
|
+
|
|
8
|
+
import {
|
|
9
|
+
OBS_BM25, TYPE_DECAY_CASE, TYPE_QUALITY_CASE,
|
|
10
|
+
notLowSignalTitleClause, LOW_SIGNAL_TITLE,
|
|
11
|
+
relaxFtsQueryToOr, debugLog, debugCatch,
|
|
12
|
+
} from './utils.mjs';
|
|
13
|
+
import { getVocabulary, computeVector, vectorSearch, rrfMerge } from './tfidf.mjs';
|
|
14
|
+
import { extractPRFTerms, expandQueryByConcepts } from './server-internals.mjs';
|
|
15
|
+
|
|
16
|
+
// Scoring expressions — full adds project boost + access bonus; simple is for
|
|
17
|
+
// expansion paths where boost would over-amplify already-loose matches.
|
|
18
|
+
const FULL_SCORE = `${OBS_BM25}
|
|
19
|
+
* (1.0 + EXP(-0.693 * (? - MAX(o.created_at_epoch, COALESCE(o.last_accessed_at, o.created_at_epoch))) / ${TYPE_DECAY_CASE}))
|
|
20
|
+
* ${TYPE_QUALITY_CASE}
|
|
21
|
+
* (CASE WHEN ? IS NOT NULL AND o.project = ? THEN 2.0 ELSE 1.0 END)
|
|
22
|
+
* (0.5 + 0.5 * COALESCE(o.importance, 1))
|
|
23
|
+
* (1.0 + 0.1 * LN(1 + COALESCE(o.access_count, 0)))
|
|
24
|
+
* (1.0 + 0.3 * (o.lesson_learned IS NOT NULL))`;
|
|
25
|
+
|
|
26
|
+
const SIMPLE_SCORE = `${OBS_BM25}
|
|
27
|
+
* (1.0 + EXP(-0.693 * (? - MAX(o.created_at_epoch, COALESCE(o.last_accessed_at, o.created_at_epoch))) / ${TYPE_DECAY_CASE}))
|
|
28
|
+
* ${TYPE_QUALITY_CASE}
|
|
29
|
+
* (0.5 + 0.5 * COALESCE(o.importance, 1))
|
|
30
|
+
* (1.0 + 0.3 * (o.lesson_learned IS NOT NULL))`;
|
|
31
|
+
|
|
32
|
+
export function buildObsFtsQuery(scoring, { multiplier, withSnippet, withOffset, includeNoise } = {}) {
|
|
33
|
+
const scoreExpr = scoring === 'full' ? FULL_SCORE : SIMPLE_SCORE;
|
|
34
|
+
const mult = multiplier ? ` * ${multiplier}` : '';
|
|
35
|
+
const lowSignalClause = includeNoise ? '' : `AND ${notLowSignalTitleClause('o')}`;
|
|
36
|
+
return `
|
|
37
|
+
SELECT o.id, o.type, o.title, o.subtitle, o.project, o.created_at, o.created_at_epoch, o.importance,
|
|
38
|
+
o.files_modified, o.lesson_learned,
|
|
39
|
+
${withSnippet ? "snippet(observations_fts, 2, '»', '«', '…', 10) as match_snippet," : ''}
|
|
40
|
+
${scoreExpr}${mult} as score
|
|
41
|
+
FROM observations_fts
|
|
42
|
+
JOIN observations o ON observations_fts.rowid = o.id
|
|
43
|
+
WHERE observations_fts MATCH ?
|
|
44
|
+
AND COALESCE(o.compressed_into, 0) = 0
|
|
45
|
+
AND o.superseded_at IS NULL
|
|
46
|
+
AND (? IS NULL OR o.project = ?)
|
|
47
|
+
AND (? IS NULL OR o.type = ?)
|
|
48
|
+
AND (? IS NULL OR o.created_at_epoch >= ?)
|
|
49
|
+
AND (? IS NULL OR o.created_at_epoch <= ?)
|
|
50
|
+
AND (? IS NULL OR COALESCE(o.importance, 1) >= ?)
|
|
51
|
+
AND (? IS NULL OR o.branch = ?)
|
|
52
|
+
${lowSignalClause}
|
|
53
|
+
ORDER BY score
|
|
54
|
+
LIMIT ?${withOffset ? ' OFFSET ?' : ''}`;
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
export function buildObsFtsParams({ now, projectBoost, ftsQuery, args, epochFrom, epochTo, limit, offset }) {
|
|
58
|
+
const params = [now];
|
|
59
|
+
if (projectBoost !== undefined) params.push(projectBoost, projectBoost);
|
|
60
|
+
params.push(
|
|
61
|
+
ftsQuery,
|
|
62
|
+
args.project ?? null, args.project ?? null,
|
|
63
|
+
args.obs_type ?? null, args.obs_type ?? null,
|
|
64
|
+
epochFrom, epochFrom,
|
|
65
|
+
epochTo, epochTo,
|
|
66
|
+
args.importance ?? null, args.importance ?? null,
|
|
67
|
+
args.branch ?? null, args.branch ?? null,
|
|
68
|
+
limit,
|
|
69
|
+
);
|
|
70
|
+
if (offset !== undefined) params.push(offset);
|
|
71
|
+
return params;
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
export function ftsRowToResult(r, { scoreMultiplier, snippet } = {}) {
|
|
75
|
+
return {
|
|
76
|
+
source: 'obs', id: r.id, type: r.type, title: r.title, subtitle: r.subtitle,
|
|
77
|
+
project: r.project, date: r.created_at, created_at_epoch: r.created_at_epoch,
|
|
78
|
+
score: scoreMultiplier ? r.score * scoreMultiplier : r.score,
|
|
79
|
+
files_modified: r.files_modified, importance: r.importance, lesson_learned: r.lesson_learned,
|
|
80
|
+
snippet: snippet ? (r.match_snippet || '') : '',
|
|
81
|
+
};
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
function expandObsByConceptCo(db, ctx, now, existingIds, results, includeNoise = false) {
|
|
85
|
+
const { ftsQuery, args, epochFrom, epochTo, limit } = ctx;
|
|
86
|
+
if (results.length >= Math.ceil(limit / 2)) return;
|
|
87
|
+
const expanded = expandQueryByConcepts(db, ftsQuery, args.project);
|
|
88
|
+
if (expanded.length === 0) return;
|
|
89
|
+
const expansionFts = expanded.map(c => `"${c.replace(/"/g, '""')}"`).join(' OR ');
|
|
90
|
+
try {
|
|
91
|
+
const expRows = db.prepare(buildObsFtsQuery('simple', { includeNoise }))
|
|
92
|
+
.all(...buildObsFtsParams({ now, ftsQuery: expansionFts, args, epochFrom, epochTo, limit }));
|
|
93
|
+
for (const r of expRows) {
|
|
94
|
+
if (!existingIds.has(r.id)) {
|
|
95
|
+
existingIds.add(r.id);
|
|
96
|
+
results.push(ftsRowToResult(r, { scoreMultiplier: 0.7 }));
|
|
97
|
+
}
|
|
98
|
+
}
|
|
99
|
+
} catch (e) { debugLog('WARN', 'search-engine', `concept expansion error: ${e.message}`); }
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
function expandObsByPRF(db, ctx, now, primaryCount, existingIds, results, includeNoise = false) {
|
|
103
|
+
const { ftsQuery, args, epochFrom, epochTo, limit } = ctx;
|
|
104
|
+
if (primaryCount < 3) return;
|
|
105
|
+
const topResults = db.prepare(`
|
|
106
|
+
SELECT o.title, o.narrative FROM observations_fts
|
|
107
|
+
JOIN observations o ON observations_fts.rowid = o.id
|
|
108
|
+
WHERE observations_fts MATCH ? AND COALESCE(o.compressed_into, 0) = 0
|
|
109
|
+
AND (? IS NULL OR o.project = ?)
|
|
110
|
+
ORDER BY ${OBS_BM25}
|
|
111
|
+
LIMIT 8
|
|
112
|
+
`).all(ftsQuery, args.project ?? null, args.project ?? null);
|
|
113
|
+
const prfTerms = extractPRFTerms(topResults, ftsQuery);
|
|
114
|
+
if (prfTerms.length === 0) return;
|
|
115
|
+
const prfFts = prfTerms.map(t => `"${t.replace(/"/g, '""')}"`).join(' OR ');
|
|
116
|
+
try {
|
|
117
|
+
const prfRows = db.prepare(buildObsFtsQuery('simple', { includeNoise }))
|
|
118
|
+
.all(...buildObsFtsParams({ now, ftsQuery: prfFts, args, epochFrom, epochTo, limit }));
|
|
119
|
+
for (const r of prfRows) {
|
|
120
|
+
if (!existingIds.has(r.id)) {
|
|
121
|
+
existingIds.add(r.id);
|
|
122
|
+
results.push(ftsRowToResult(r, { scoreMultiplier: 0.6 }));
|
|
123
|
+
}
|
|
124
|
+
}
|
|
125
|
+
} catch (e) { debugLog('WARN', 'search-engine', `PRF expansion error: ${e.message}`); }
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
/**
|
|
129
|
+
* Hybrid observation search — single source of truth for FTS + vector + RRF.
|
|
130
|
+
*
|
|
131
|
+
* Pipeline (paired-path with mem-cli.mjs cmdSearch via this module):
|
|
132
|
+
* 1. FTS5 BM25 query (full scoring)
|
|
133
|
+
* 2. OR fallback when AND returned 0 → sets ctx.orFallbackFired
|
|
134
|
+
* 3. Concept co-occurrence expansion (when results sparse)
|
|
135
|
+
* 4. PRF (pseudo-relevance feedback) expansion
|
|
136
|
+
* 5. Vector search + RRF merge (re-ranks all results when both modes have hits)
|
|
137
|
+
* 6. Vector-only fallback (when FTS5 found nothing)
|
|
138
|
+
*
|
|
139
|
+
* @param {Database} db - better-sqlite3 instance
|
|
140
|
+
* @param {object} ctx - { ftsQuery, args, epochFrom, epochTo, perSourceLimit,
|
|
141
|
+
* perSourceOffset, currentProject, limit, orFallbackFired }
|
|
142
|
+
* @returns {Array} list of result objects (mutated ctx may set orFallbackFired)
|
|
143
|
+
*/
|
|
144
|
+
export function searchObservationsHybrid(db, ctx) {
|
|
145
|
+
const { ftsQuery, args, epochFrom, epochTo, perSourceLimit, perSourceOffset, currentProject, limit } = ctx;
|
|
146
|
+
const results = [];
|
|
147
|
+
const includeNoise = args.include_noise === true;
|
|
148
|
+
|
|
149
|
+
if (!ftsQuery) {
|
|
150
|
+
const params = [];
|
|
151
|
+
const wheres = ['COALESCE(compressed_into, 0) = 0', 'superseded_at IS NULL'];
|
|
152
|
+
if (args.project) { wheres.push('project = ?'); params.push(args.project); }
|
|
153
|
+
if (args.obs_type) { wheres.push('type = ?'); params.push(args.obs_type); }
|
|
154
|
+
if (epochFrom !== null) { wheres.push('created_at_epoch >= ?'); params.push(epochFrom); }
|
|
155
|
+
if (epochTo !== null) { wheres.push('created_at_epoch <= ?'); params.push(epochTo); }
|
|
156
|
+
if (args.importance) { wheres.push('COALESCE(importance, 1) >= ?'); params.push(args.importance); }
|
|
157
|
+
if (args.branch) { wheres.push('branch = ?'); params.push(args.branch); }
|
|
158
|
+
const where = `WHERE ${wheres.join(' AND ')}`;
|
|
159
|
+
params.push(perSourceLimit, perSourceOffset);
|
|
160
|
+
const rows = db.prepare(`
|
|
161
|
+
SELECT id, type, title, subtitle, project, created_at, created_at_epoch, files_modified, importance, lesson_learned
|
|
162
|
+
FROM observations ${where}
|
|
163
|
+
ORDER BY created_at_epoch DESC
|
|
164
|
+
LIMIT ? OFFSET ?
|
|
165
|
+
`).all(...params);
|
|
166
|
+
for (const r of rows) {
|
|
167
|
+
results.push({ source: 'obs', id: r.id, type: r.type, title: r.title, subtitle: r.subtitle, project: r.project, date: r.created_at, created_at_epoch: r.created_at_epoch, files_modified: r.files_modified, importance: r.importance, lesson_learned: r.lesson_learned });
|
|
168
|
+
}
|
|
169
|
+
return results;
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
const now = Date.now();
|
|
173
|
+
const projectBoost = args.project ? null : currentProject;
|
|
174
|
+
|
|
175
|
+
const rows = db.prepare(buildObsFtsQuery('full', { withSnippet: true, withOffset: true, includeNoise }))
|
|
176
|
+
.all(...buildObsFtsParams({ now, projectBoost, ftsQuery, args, epochFrom, epochTo, limit: perSourceLimit, offset: perSourceOffset }));
|
|
177
|
+
for (const r of rows) results.push(ftsRowToResult(r, { snippet: true }));
|
|
178
|
+
|
|
179
|
+
// OR fallback — must run BEFORE vector merge so orFallbackFired reflects FTS-only state.
|
|
180
|
+
if (rows.length === 0) {
|
|
181
|
+
const orQuery = relaxFtsQueryToOr(ftsQuery);
|
|
182
|
+
if (orQuery) {
|
|
183
|
+
try {
|
|
184
|
+
const orRows = db.prepare(buildObsFtsQuery('full', { multiplier: 0.5, withSnippet: true, withOffset: true, includeNoise }))
|
|
185
|
+
.all(...buildObsFtsParams({ now, projectBoost, ftsQuery: orQuery, args, epochFrom, epochTo, limit: perSourceLimit, offset: perSourceOffset }));
|
|
186
|
+
if (orRows.length > 0) ctx.orFallbackFired = true;
|
|
187
|
+
for (const r of orRows) results.push(ftsRowToResult(r, { snippet: true }));
|
|
188
|
+
} catch (e) { debugCatch(e, 'searchObservationsHybrid-or-fallback'); }
|
|
189
|
+
}
|
|
190
|
+
}
|
|
191
|
+
|
|
192
|
+
// Two-phase query expansion (only when well below limit)
|
|
193
|
+
if (rows.length > 0 && results.length < Math.ceil(limit / 2)) {
|
|
194
|
+
const existingIds = new Set(results.map(r => r.id));
|
|
195
|
+
expandObsByConceptCo(db, ctx, now, existingIds, results, includeNoise);
|
|
196
|
+
expandObsByPRF(db, ctx, now, rows.length, existingIds, results, includeNoise);
|
|
197
|
+
}
|
|
198
|
+
|
|
199
|
+
// Vector search + RRF hybrid merge
|
|
200
|
+
try {
|
|
201
|
+
const vocab = getVocabulary(db);
|
|
202
|
+
if (!vocab) return results;
|
|
203
|
+
const queryText = ftsQuery.replace(/['"()]/g, ' ');
|
|
204
|
+
const queryVec = computeVector(queryText, vocab);
|
|
205
|
+
if (!queryVec) return results;
|
|
206
|
+
const vecResults = vectorSearch(db, queryVec, {
|
|
207
|
+
project: args.project ?? null,
|
|
208
|
+
type: args.obs_type ?? null,
|
|
209
|
+
vocabVersion: vocab.version,
|
|
210
|
+
});
|
|
211
|
+
if (vecResults.length === 0) return results;
|
|
212
|
+
|
|
213
|
+
if (results.length > 0) {
|
|
214
|
+
const rrfRanking = rrfMerge(results, vecResults);
|
|
215
|
+
const resultMap = new Map(results.map(r => [r.id, r]));
|
|
216
|
+
for (const vr of vecResults) {
|
|
217
|
+
if (!resultMap.has(vr.id)) {
|
|
218
|
+
const obs = db.prepare('SELECT id, type, title, subtitle, project, created_at, created_at_epoch, importance, files_modified, branch, lesson_learned FROM observations WHERE id = ?').get(vr.id);
|
|
219
|
+
if (!obs) continue;
|
|
220
|
+
if (epochFrom !== null && obs.created_at_epoch < epochFrom) continue;
|
|
221
|
+
if (epochTo !== null && obs.created_at_epoch > epochTo) continue;
|
|
222
|
+
if (args.importance && (obs.importance ?? 1) < args.importance) continue;
|
|
223
|
+
if (args.branch && obs.branch !== args.branch) continue;
|
|
224
|
+
if (!includeNoise && obs.title && LOW_SIGNAL_TITLE.test(obs.title)) continue;
|
|
225
|
+
resultMap.set(vr.id, { source: 'obs', id: obs.id, type: obs.type, title: obs.title, subtitle: obs.subtitle, project: obs.project, date: obs.created_at, importance: obs.importance, files_modified: obs.files_modified, lesson_learned: obs.lesson_learned, snippet: '' });
|
|
226
|
+
}
|
|
227
|
+
}
|
|
228
|
+
const reordered = rrfRanking
|
|
229
|
+
.filter(rr => resultMap.has(rr.id))
|
|
230
|
+
.map(rr => ({ ...resultMap.get(rr.id), score: -rr.rrfScore }));
|
|
231
|
+
results.length = 0;
|
|
232
|
+
results.push(...reordered);
|
|
233
|
+
} else {
|
|
234
|
+
// FTS5 found nothing but vector found results
|
|
235
|
+
for (const vr of vecResults) {
|
|
236
|
+
const obs = db.prepare('SELECT id, type, title, subtitle, project, created_at, created_at_epoch, importance, files_modified, branch FROM observations WHERE id = ?').get(vr.id);
|
|
237
|
+
if (!obs) continue;
|
|
238
|
+
if (epochFrom !== null && obs.created_at_epoch < epochFrom) continue;
|
|
239
|
+
if (epochTo !== null && obs.created_at_epoch > epochTo) continue;
|
|
240
|
+
if (args.importance && (obs.importance ?? 1) < args.importance) continue;
|
|
241
|
+
if (args.branch && obs.branch !== args.branch) continue;
|
|
242
|
+
if (!includeNoise && obs.title && LOW_SIGNAL_TITLE.test(obs.title)) continue;
|
|
243
|
+
results.push({ source: 'obs', id: obs.id, type: obs.type, title: obs.title, subtitle: obs.subtitle, project: obs.project, date: obs.created_at, importance: obs.importance, files_modified: obs.files_modified, lesson_learned: obs.lesson_learned, score: -vr.similarity, snippet: '' });
|
|
244
|
+
}
|
|
245
|
+
}
|
|
246
|
+
} catch (e) { debugCatch(e, 'searchObservationsHybrid-vector'); }
|
|
247
|
+
|
|
248
|
+
return results;
|
|
249
|
+
}
|
package/server.mjs
CHANGED
|
@@ -5,11 +5,12 @@
|
|
|
5
5
|
import { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js';
|
|
6
6
|
import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js';
|
|
7
7
|
import { ListToolsRequestSchema } from '@modelcontextprotocol/sdk/types.js';
|
|
8
|
-
import { jaccardSimilarity, truncate, typeIcon, sanitizeFtsQuery, relaxFtsQueryToOr, inferProject, computeMinHash, estimateJaccardFromMinHash, scrubSecrets, cjkBigrams, fmtDate, isoWeekKey, debugLog, debugCatch, COMPRESSED_PENDING_PURGE, OBS_BM25, SESS_BM25,
|
|
8
|
+
import { jaccardSimilarity, truncate, typeIcon, sanitizeFtsQuery, relaxFtsQueryToOr, inferProject, computeMinHash, estimateJaccardFromMinHash, scrubSecrets, cjkBigrams, fmtDate, isoWeekKey, debugLog, debugCatch, COMPRESSED_PENDING_PURGE, OBS_BM25, SESS_BM25, getCurrentBranch, DEFAULT_DECAY_HALF_LIFE_MS, isPathConfined, notLowSignalTitleClause } from './utils.mjs';
|
|
9
9
|
import { extractCjkLikePatterns, cjkPrecisionOk } from './nlp.mjs';
|
|
10
10
|
import { resolveProject as _resolveProjectShared } from './project-utils.mjs';
|
|
11
11
|
import { ensureDb, DB_PATH, REGISTRY_DB_PATH } from './schema.mjs';
|
|
12
|
-
import { reRankWithContext, markSuperseded,
|
|
12
|
+
import { reRankWithContext, markSuperseded, autoBoostIfNeeded, runIdleCleanup, buildServerInstructions } from './server-internals.mjs';
|
|
13
|
+
import { searchObservationsHybrid } from './search-engine.mjs';
|
|
13
14
|
import { effectiveQuiet } from './hook-shared.mjs';
|
|
14
15
|
import { computeTier, TIER_CASE_SQL, tierSqlParams } from './tier.mjs';
|
|
15
16
|
import { memSearchSchema, memRecentSchema, memTimelineSchema, memGetSchema, memDeleteSchema, memSaveSchema, memStatsSchema, memCompressSchema, memMaintainSchema, memOptimizeSchema, memUpdateSchema, memExportSchema, memRecallSchema, memFtsCheckSchema, memRegistrySchema, memBrowseSchema, memUseSchema, tools as TOOL_DEFS } from './tool-schemas.mjs';
|
|
@@ -28,7 +29,7 @@ import { homedir } from 'os';
|
|
|
28
29
|
import { ensureRegistryDb, upsertResource } from './registry.mjs';
|
|
29
30
|
import { searchResources } from './registry-retriever.mjs';
|
|
30
31
|
import { probeOtherSources as probeIdSources, parseIdToken, bucketIdTokens } from './lib/id-routing.mjs';
|
|
31
|
-
import { getVocabulary, rebuildVocabulary, _resetVocabCache, computeVector
|
|
32
|
+
import { getVocabulary, rebuildVocabulary, _resetVocabCache, computeVector } from './tfidf.mjs';
|
|
32
33
|
import { createRequire } from 'module';
|
|
33
34
|
|
|
34
35
|
const require = createRequire(import.meta.url);
|
|
@@ -145,241 +146,13 @@ function safeHandler(fn) {
|
|
|
145
146
|
// TYPE_DECAY_CASE imported from utils.mjs
|
|
146
147
|
|
|
147
148
|
// Score expression variants for FTS5 queries (see Scoring Model Constants above)
|
|
148
|
-
//
|
|
149
|
-
//
|
|
150
|
-
const FULL_SCORE = `${OBS_BM25}
|
|
151
|
-
* (1.0 + EXP(-0.693 * (? - MAX(o.created_at_epoch, COALESCE(o.last_accessed_at, o.created_at_epoch))) / ${TYPE_DECAY_CASE}))
|
|
152
|
-
* ${TYPE_QUALITY_CASE}
|
|
153
|
-
* (CASE WHEN ? IS NOT NULL AND o.project = ? THEN 2.0 ELSE 1.0 END)
|
|
154
|
-
* (0.5 + 0.5 * COALESCE(o.importance, 1))
|
|
155
|
-
* (1.0 + 0.1 * LN(1 + COALESCE(o.access_count, 0)))
|
|
156
|
-
* (1.0 + 0.3 * (o.lesson_learned IS NOT NULL))`;
|
|
157
|
-
|
|
158
|
-
const SIMPLE_SCORE = `${OBS_BM25}
|
|
159
|
-
* (1.0 + EXP(-0.693 * (? - MAX(o.created_at_epoch, COALESCE(o.last_accessed_at, o.created_at_epoch))) / ${TYPE_DECAY_CASE}))
|
|
160
|
-
* ${TYPE_QUALITY_CASE}
|
|
161
|
-
* (0.5 + 0.5 * COALESCE(o.importance, 1))
|
|
162
|
-
* (1.0 + 0.3 * (o.lesson_learned IS NOT NULL))`;
|
|
163
|
-
|
|
164
|
-
/**
|
|
165
|
-
* Build an FTS5 observation search query.
|
|
166
|
-
* @param {'full'|'simple'} scoring - full includes project boost + access bonus
|
|
167
|
-
* @param {object} opts - { multiplier, withSnippet, withOffset, includeNoise }
|
|
168
|
-
* includeNoise=true keeps hook-llm fallback titles ("Modified X", "Worked on X", etc.);
|
|
169
|
-
* default false mirrors the filter already applied in hook-memory.mjs / user-prompt-search.js.
|
|
170
|
-
*/
|
|
171
|
-
function buildObsFtsQuery(scoring, { multiplier, withSnippet, withOffset, includeNoise } = {}) {
|
|
172
|
-
const scoreExpr = scoring === 'full' ? FULL_SCORE : SIMPLE_SCORE;
|
|
173
|
-
const mult = multiplier ? ` * ${multiplier}` : '';
|
|
174
|
-
const lowSignalClause = includeNoise ? '' : `AND ${notLowSignalTitleClause('o')}`;
|
|
175
|
-
return `
|
|
176
|
-
SELECT o.id, o.type, o.title, o.subtitle, o.project, o.created_at, o.created_at_epoch, o.importance,
|
|
177
|
-
o.files_modified,
|
|
178
|
-
${withSnippet ? "snippet(observations_fts, 2, '»', '«', '…', 10) as match_snippet," : ''}
|
|
179
|
-
${scoreExpr}${mult} as score
|
|
180
|
-
FROM observations_fts
|
|
181
|
-
JOIN observations o ON observations_fts.rowid = o.id
|
|
182
|
-
WHERE observations_fts MATCH ?
|
|
183
|
-
AND COALESCE(o.compressed_into, 0) = 0
|
|
184
|
-
AND o.superseded_at IS NULL
|
|
185
|
-
AND (? IS NULL OR o.project = ?)
|
|
186
|
-
AND (? IS NULL OR o.type = ?)
|
|
187
|
-
AND (? IS NULL OR o.created_at_epoch >= ?)
|
|
188
|
-
AND (? IS NULL OR o.created_at_epoch <= ?)
|
|
189
|
-
AND (? IS NULL OR COALESCE(o.importance, 1) >= ?)
|
|
190
|
-
AND (? IS NULL OR o.branch = ?)
|
|
191
|
-
${lowSignalClause}
|
|
192
|
-
ORDER BY score
|
|
193
|
-
LIMIT ?${withOffset ? ' OFFSET ?' : ''}`;
|
|
194
|
-
}
|
|
195
|
-
|
|
196
|
-
/** Build params array for an FTS5 observation query. */
|
|
197
|
-
function buildObsFtsParams({ now, projectBoost, ftsQuery, args, epochFrom, epochTo, limit, offset }) {
|
|
198
|
-
const params = [now];
|
|
199
|
-
if (projectBoost !== undefined) params.push(projectBoost, projectBoost); // full scoring only
|
|
200
|
-
params.push(
|
|
201
|
-
ftsQuery,
|
|
202
|
-
args.project ?? null, args.project ?? null,
|
|
203
|
-
args.obs_type ?? null, args.obs_type ?? null,
|
|
204
|
-
epochFrom, epochFrom,
|
|
205
|
-
epochTo, epochTo,
|
|
206
|
-
args.importance ?? null, args.importance ?? null,
|
|
207
|
-
args.branch ?? null, args.branch ?? null,
|
|
208
|
-
limit,
|
|
209
|
-
);
|
|
210
|
-
if (offset !== undefined) params.push(offset);
|
|
211
|
-
return params;
|
|
212
|
-
}
|
|
213
|
-
|
|
214
|
-
/** Map a raw FTS5 row to a result object. */
|
|
215
|
-
function ftsRowToResult(r, { scoreMultiplier, snippet } = {}) {
|
|
216
|
-
return {
|
|
217
|
-
source: 'obs', id: r.id, type: r.type, title: r.title, subtitle: r.subtitle,
|
|
218
|
-
project: r.project, date: r.created_at, created_at_epoch: r.created_at_epoch,
|
|
219
|
-
score: scoreMultiplier ? r.score * scoreMultiplier : r.score,
|
|
220
|
-
files_modified: r.files_modified, importance: r.importance, snippet: snippet ? (r.match_snippet || '') : '',
|
|
221
|
-
};
|
|
222
|
-
}
|
|
149
|
+
// Observation-search core (FTS query/params builders, hybrid pipeline) lives in
|
|
150
|
+
// search-engine.mjs so mem-cli.mjs gets the identical implementation.
|
|
223
151
|
|
|
152
|
+
// Thin wrapper around the shared engine — keeps the existing call sites
|
|
153
|
+
// (searchObservations(ctx)) without ferrying `db` through every layer.
|
|
224
154
|
function searchObservations(ctx) {
|
|
225
|
-
|
|
226
|
-
const results = [];
|
|
227
|
-
// R-1: hide hook-llm fallback titles unless caller explicitly opts in via include_noise=true.
|
|
228
|
-
const includeNoise = args.include_noise === true;
|
|
229
|
-
|
|
230
|
-
if (ftsQuery) {
|
|
231
|
-
const now = Date.now();
|
|
232
|
-
const projectBoost = args.project ? null : currentProject;
|
|
233
|
-
|
|
234
|
-
const rows = db.prepare(buildObsFtsQuery('full', { withSnippet: true, withOffset: true, includeNoise }))
|
|
235
|
-
.all(...buildObsFtsParams({ now, projectBoost, ftsQuery, args, epochFrom, epochTo, limit: perSourceLimit, offset: perSourceOffset }));
|
|
236
|
-
for (const r of rows) results.push(ftsRowToResult(r, { snippet: true }));
|
|
237
|
-
|
|
238
|
-
// OR fallback: when AND query returns 0 results, retry with OR semantics.
|
|
239
|
-
// Sets ctx.orFallbackFired so the top-level formatter can surface a "relaxed
|
|
240
|
-
// AND→OR" hint — without it, callers can't distinguish a strict multi-term
|
|
241
|
-
// match from a partial single-term recovery.
|
|
242
|
-
if (rows.length === 0) {
|
|
243
|
-
const orQuery = relaxFtsQueryToOr(ftsQuery);
|
|
244
|
-
if (orQuery) {
|
|
245
|
-
try {
|
|
246
|
-
const orRows = db.prepare(buildObsFtsQuery('full', { multiplier: 0.5, withSnippet: true, withOffset: true, includeNoise }))
|
|
247
|
-
.all(...buildObsFtsParams({ now, projectBoost, ftsQuery: orQuery, args, epochFrom, epochTo, limit: perSourceLimit, offset: perSourceOffset }));
|
|
248
|
-
if (orRows.length > 0) ctx.orFallbackFired = true;
|
|
249
|
-
for (const r of orRows) results.push(ftsRowToResult(r, { snippet: true }));
|
|
250
|
-
} catch (e) { debugCatch(e, 'searchObservations-or-fallback'); }
|
|
251
|
-
}
|
|
252
|
-
}
|
|
253
|
-
|
|
254
|
-
// Two-phase query expansion for sparse results (only when well below limit)
|
|
255
|
-
if (rows.length > 0 && results.length < Math.ceil(limit / 2)) {
|
|
256
|
-
const existingIds = new Set(results.map(r => r.id));
|
|
257
|
-
expandObsByConceptCo(ctx, now, existingIds, results, includeNoise);
|
|
258
|
-
expandObsByPRF(ctx, now, rows.length, existingIds, results, includeNoise);
|
|
259
|
-
}
|
|
260
|
-
|
|
261
|
-
// Vector search + RRF hybrid merge
|
|
262
|
-
try {
|
|
263
|
-
const vocab = getVocabulary(db);
|
|
264
|
-
if (vocab) {
|
|
265
|
-
const queryText = ftsQuery.replace(/['"()]/g, ' ');
|
|
266
|
-
const queryVec = computeVector(queryText, vocab);
|
|
267
|
-
if (queryVec) {
|
|
268
|
-
const vecResults = vectorSearch(db, queryVec, {
|
|
269
|
-
project: args.project ?? null,
|
|
270
|
-
type: args.obs_type ?? null,
|
|
271
|
-
vocabVersion: vocab.version,
|
|
272
|
-
});
|
|
273
|
-
if (vecResults.length > 0 && results.length > 0) {
|
|
274
|
-
// RRF merge: combine BM25 ranked results with vector ranked results
|
|
275
|
-
const rrfRanking = rrfMerge(results, vecResults);
|
|
276
|
-
const resultMap = new Map(results.map(r => [r.id, r]));
|
|
277
|
-
// Add vector-only results (found by similarity but not by FTS5)
|
|
278
|
-
for (const vr of vecResults) {
|
|
279
|
-
if (!resultMap.has(vr.id)) {
|
|
280
|
-
const obs = db.prepare('SELECT id, type, title, subtitle, project, created_at, created_at_epoch, importance, files_modified, branch FROM observations WHERE id = ?').get(vr.id);
|
|
281
|
-
if (obs) {
|
|
282
|
-
// Apply same filter constraints as FTS5
|
|
283
|
-
if (epochFrom !== null && obs.created_at_epoch < epochFrom) continue;
|
|
284
|
-
if (epochTo !== null && obs.created_at_epoch > epochTo) continue;
|
|
285
|
-
if (args.importance && (obs.importance ?? 1) < args.importance) continue;
|
|
286
|
-
if (args.branch && obs.branch !== args.branch) continue;
|
|
287
|
-
// R-1: parity with FTS5 WHERE — vector path must also reject LOW_SIGNAL titles
|
|
288
|
-
// so RRF cannot re-admit what the SQL clause excluded.
|
|
289
|
-
if (!includeNoise && obs.title && LOW_SIGNAL_TITLE.test(obs.title)) continue;
|
|
290
|
-
resultMap.set(vr.id, { source: 'obs', id: obs.id, type: obs.type, title: obs.title, subtitle: obs.subtitle, project: obs.project, date: obs.created_at, importance: obs.importance, files_modified: obs.files_modified, snippet: '' });
|
|
291
|
-
}
|
|
292
|
-
}
|
|
293
|
-
}
|
|
294
|
-
// Re-order by RRF score
|
|
295
|
-
const reordered = rrfRanking
|
|
296
|
-
.filter(rr => resultMap.has(rr.id))
|
|
297
|
-
.map(rr => ({ ...resultMap.get(rr.id), score: -rr.rrfScore })); // negative for BM25-compatible sort
|
|
298
|
-
results.length = 0;
|
|
299
|
-
results.push(...reordered);
|
|
300
|
-
} else if (vecResults.length > 0 && results.length === 0) {
|
|
301
|
-
// FTS5 found nothing but vector found results
|
|
302
|
-
for (const vr of vecResults) {
|
|
303
|
-
const obs = db.prepare('SELECT id, type, title, subtitle, project, created_at, created_at_epoch, importance, files_modified, branch FROM observations WHERE id = ?').get(vr.id);
|
|
304
|
-
if (!obs) continue;
|
|
305
|
-
if (epochFrom !== null && obs.created_at_epoch < epochFrom) continue;
|
|
306
|
-
if (epochTo !== null && obs.created_at_epoch > epochTo) continue;
|
|
307
|
-
if (args.importance && (obs.importance ?? 1) < args.importance) continue;
|
|
308
|
-
if (args.branch && obs.branch !== args.branch) continue;
|
|
309
|
-
if (!includeNoise && obs.title && LOW_SIGNAL_TITLE.test(obs.title)) continue;
|
|
310
|
-
results.push({ source: 'obs', id: obs.id, type: obs.type, title: obs.title, subtitle: obs.subtitle, project: obs.project, date: obs.created_at, importance: obs.importance, files_modified: obs.files_modified, score: -vr.similarity, snippet: '' });
|
|
311
|
-
}
|
|
312
|
-
}
|
|
313
|
-
}
|
|
314
|
-
}
|
|
315
|
-
} catch (e) { debugCatch(e, 'searchObservations-vector'); }
|
|
316
|
-
} else {
|
|
317
|
-
const params = [];
|
|
318
|
-
const wheres = ['COALESCE(compressed_into, 0) = 0', 'superseded_at IS NULL'];
|
|
319
|
-
if (args.project) { wheres.push('project = ?'); params.push(args.project); }
|
|
320
|
-
if (args.obs_type) { wheres.push('type = ?'); params.push(args.obs_type); }
|
|
321
|
-
if (epochFrom !== null) { wheres.push('created_at_epoch >= ?'); params.push(epochFrom); }
|
|
322
|
-
if (epochTo !== null) { wheres.push('created_at_epoch <= ?'); params.push(epochTo); }
|
|
323
|
-
if (args.importance) { wheres.push('COALESCE(importance, 1) >= ?'); params.push(args.importance); }
|
|
324
|
-
if (args.branch) { wheres.push('branch = ?'); params.push(args.branch); }
|
|
325
|
-
const where = `WHERE ${wheres.join(' AND ')}`;
|
|
326
|
-
params.push(perSourceLimit, perSourceOffset);
|
|
327
|
-
const rows = db.prepare(`
|
|
328
|
-
SELECT id, type, title, subtitle, project, created_at, created_at_epoch, files_modified, importance
|
|
329
|
-
FROM observations ${where}
|
|
330
|
-
ORDER BY created_at_epoch DESC
|
|
331
|
-
LIMIT ? OFFSET ?
|
|
332
|
-
`).all(...params);
|
|
333
|
-
for (const r of rows) {
|
|
334
|
-
results.push({ source: 'obs', id: r.id, type: r.type, title: r.title, subtitle: r.subtitle, project: r.project, date: r.created_at, created_at_epoch: r.created_at_epoch, files_modified: r.files_modified, importance: r.importance });
|
|
335
|
-
}
|
|
336
|
-
}
|
|
337
|
-
|
|
338
|
-
return results;
|
|
339
|
-
}
|
|
340
|
-
|
|
341
|
-
function expandObsByConceptCo(ctx, now, existingIds, results, includeNoise = false) {
|
|
342
|
-
const { ftsQuery, args, epochFrom, epochTo, limit } = ctx;
|
|
343
|
-
if (results.length >= Math.ceil(limit / 2)) return;
|
|
344
|
-
const expanded = expandQueryByConcepts(db, ftsQuery, args.project);
|
|
345
|
-
if (expanded.length === 0) return;
|
|
346
|
-
const expansionFts = expanded.map(c => `"${c.replace(/"/g, '""')}"`).join(' OR ');
|
|
347
|
-
try {
|
|
348
|
-
const expRows = db.prepare(buildObsFtsQuery('simple', { includeNoise }))
|
|
349
|
-
.all(...buildObsFtsParams({ now, ftsQuery: expansionFts, args, epochFrom, epochTo, limit }));
|
|
350
|
-
for (const r of expRows) {
|
|
351
|
-
if (!existingIds.has(r.id)) {
|
|
352
|
-
existingIds.add(r.id);
|
|
353
|
-
results.push(ftsRowToResult(r, { scoreMultiplier: 0.7 }));
|
|
354
|
-
}
|
|
355
|
-
}
|
|
356
|
-
} catch (e) { debugLog('WARN', 'mem_search', `concept expansion error: ${e.message}`); }
|
|
357
|
-
}
|
|
358
|
-
|
|
359
|
-
function expandObsByPRF(ctx, now, primaryCount, existingIds, results, includeNoise = false) {
|
|
360
|
-
const { ftsQuery, args, epochFrom, epochTo, limit } = ctx;
|
|
361
|
-
if (primaryCount < 3) return;
|
|
362
|
-
const topResults = db.prepare(`
|
|
363
|
-
SELECT o.title, o.narrative FROM observations_fts
|
|
364
|
-
JOIN observations o ON observations_fts.rowid = o.id
|
|
365
|
-
WHERE observations_fts MATCH ? AND COALESCE(o.compressed_into, 0) = 0
|
|
366
|
-
AND (? IS NULL OR o.project = ?)
|
|
367
|
-
ORDER BY ${OBS_BM25}
|
|
368
|
-
LIMIT 8
|
|
369
|
-
`).all(ftsQuery, args.project ?? null, args.project ?? null);
|
|
370
|
-
const prfTerms = extractPRFTerms(topResults, ftsQuery);
|
|
371
|
-
if (prfTerms.length === 0) return;
|
|
372
|
-
const prfFts = prfTerms.map(t => `"${t.replace(/"/g, '""')}"`).join(' OR ');
|
|
373
|
-
try {
|
|
374
|
-
const prfRows = db.prepare(buildObsFtsQuery('simple', { includeNoise }))
|
|
375
|
-
.all(...buildObsFtsParams({ now, ftsQuery: prfFts, args, epochFrom, epochTo, limit }));
|
|
376
|
-
for (const r of prfRows) {
|
|
377
|
-
if (!existingIds.has(r.id)) {
|
|
378
|
-
existingIds.add(r.id);
|
|
379
|
-
results.push(ftsRowToResult(r, { scoreMultiplier: 0.6 }));
|
|
380
|
-
}
|
|
381
|
-
}
|
|
382
|
-
} catch (e) { debugLog('WARN', 'mem_search', `PRF expansion error: ${e.message}`); }
|
|
155
|
+
return searchObservationsHybrid(db, ctx);
|
|
383
156
|
}
|
|
384
157
|
|
|
385
158
|
function searchSessions(ctx) {
|
package/source-files.mjs
CHANGED
|
@@ -6,7 +6,7 @@
|
|
|
6
6
|
|
|
7
7
|
export const SOURCE_FILES = [
|
|
8
8
|
// Entry points and top-level modules
|
|
9
|
-
'cli.mjs', 'server.mjs', 'server-internals.mjs', 'tool-schemas.mjs',
|
|
9
|
+
'cli.mjs', 'server.mjs', 'server-internals.mjs', 'search-engine.mjs', 'tool-schemas.mjs',
|
|
10
10
|
'hook.mjs', 'hook-shared.mjs', 'hook-llm.mjs', 'hook-memory.mjs', 'skip-tools.mjs',
|
|
11
11
|
'hook-semaphore.mjs', 'hook-episode.mjs', 'hook-context.mjs', 'hook-handoff.mjs',
|
|
12
12
|
'hook-update.mjs', 'hook-optimize.mjs',
|
package/tool-schemas.mjs
CHANGED
|
@@ -318,11 +318,13 @@ export const tools = [
|
|
|
318
318
|
name: 'mem_timeline',
|
|
319
319
|
description:
|
|
320
320
|
'Show observations before and after an anchor point (by ID or by FTS query).\n' +
|
|
321
|
+
'Query-anchor ranks by BM25 × time-decay → BEST topical match, not most recent.\n' +
|
|
321
322
|
'\n' +
|
|
322
323
|
'DO NOT use when:\n' +
|
|
323
324
|
' - You only want one record (use mem_get)\n' +
|
|
324
325
|
' - You have no anchor in mind and are just browsing (use mem_recent or mem_browse)\n' +
|
|
325
326
|
' - The sequence is obvious from commit history (use git log)\n' +
|
|
327
|
+
' - You want "recent activity around X" (use mem_recent or mem_search sort="time")\n' +
|
|
326
328
|
'\n' +
|
|
327
329
|
'USE when:\n' +
|
|
328
330
|
' - Reconstructing what led up to / followed a specific bug or decision\n' +
|