claude-mem-lite 3.6.0 → 3.7.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude-plugin/marketplace.json +1 -1
- package/.claude-plugin/plugin.json +1 -1
- package/README.md +21 -13
- package/README.zh-CN.md +1 -1
- package/deep-search.mjs +26 -4
- package/hook-update.mjs +17 -1
- package/hook.mjs +403 -373
- package/install.mjs +691 -639
- package/lib/atomic-write.mjs +38 -0
- package/lib/doctor-benchmark.mjs +4 -4
- package/lib/err-sampler.mjs +7 -3
- package/lib/lesson-idents.mjs +32 -0
- package/lib/proc-lock.mjs +112 -0
- package/lib/search-core.mjs +272 -16
- package/mem-cli.mjs +56 -175
- package/package.json +6 -2
- package/schema.mjs +119 -65
- package/scoring-sql.mjs +25 -0
- package/scripts/post-tool-recall.js +71 -0
- package/scripts/pre-tool-recall.js +27 -2
- package/search-engine.mjs +1 -1
- package/{server-internals.mjs → search-scoring.mjs} +6 -2
- package/server.mjs +85 -295
- package/source-files.mjs +11 -1
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
// lib/atomic-write.mjs — crash-safe file writes with optional one-time backup.
|
|
2
|
+
//
|
|
3
|
+
// Why: several write paths mutate user-global config that, if torn or clobbered,
|
|
4
|
+
// breaks the user outside the plugin's control — most acutely ~/.claude.json
|
|
5
|
+
// (the WHOLE Claude Code config) in hook-update's post-update MCP dedup, and
|
|
6
|
+
// ~/.claude/settings.json in install. A plain writeFileSync can leave a
|
|
7
|
+
// half-written file on crash, and a fixed ".tmp" name races concurrent writers.
|
|
8
|
+
// This writes to a pid-unique temp then renames (atomic on POSIX), and can drop
|
|
9
|
+
// a one-time ".bak" so a logic bug in the caller's merge is recoverable.
|
|
10
|
+
|
|
11
|
+
import { writeFileSync, renameSync, existsSync, copyFileSync, mkdirSync } from 'node:fs';
|
|
12
|
+
import { dirname } from 'node:path';
|
|
13
|
+
|
|
14
|
+
/**
|
|
15
|
+
* Atomically write `data` to `filePath` (temp + rename). Optionally back up the
|
|
16
|
+
* existing file once to `<filePath>.bak` before the first overwrite.
|
|
17
|
+
* @param {string} filePath
|
|
18
|
+
* @param {string} data
|
|
19
|
+
* @param {object} [opts]
|
|
20
|
+
* @param {boolean} [opts.backup=false] Create <filePath>.bak if absent and the
|
|
21
|
+
* target exists, before writing. Only the first call creates it, so the backup
|
|
22
|
+
* preserves the last-known-good rather than being overwritten each run.
|
|
23
|
+
*/
|
|
24
|
+
export function atomicWriteFileSync(filePath, data, { backup = false } = {}) {
|
|
25
|
+
const dir = dirname(filePath);
|
|
26
|
+
if (!existsSync(dir)) mkdirSync(dir, { recursive: true });
|
|
27
|
+
|
|
28
|
+
if (backup && existsSync(filePath) && !existsSync(filePath + '.bak')) {
|
|
29
|
+
try { copyFileSync(filePath, filePath + '.bak'); } catch { /* best-effort backup */ }
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
// pid-unique temp: a fixed ".tmp" name lets two concurrent installs clobber
|
|
33
|
+
// each other's temp mid-write. Same-dir temp keeps the rename atomic (no
|
|
34
|
+
// cross-device move).
|
|
35
|
+
const tmp = `${filePath}.tmp-${process.pid}`;
|
|
36
|
+
writeFileSync(tmp, data);
|
|
37
|
+
renameSync(tmp, filePath);
|
|
38
|
+
}
|
package/lib/doctor-benchmark.mjs
CHANGED
|
@@ -16,7 +16,7 @@ import { sanitizeFtsQuery, OBS_BM25 } from '../utils.mjs';
|
|
|
16
16
|
|
|
17
17
|
const __dirname = dirname(fileURLToPath(import.meta.url));
|
|
18
18
|
const SERVER_PATH = join(__dirname, '..', 'server.mjs');
|
|
19
|
-
const
|
|
19
|
+
const SEARCH_SCORING_PATH = join(__dirname, '..', 'search-scoring.mjs');
|
|
20
20
|
const BENCHMARK_VERSION = '1';
|
|
21
21
|
|
|
22
22
|
function extractStringArrayBody(body) {
|
|
@@ -40,7 +40,7 @@ function extractStringArrayBody(body) {
|
|
|
40
40
|
* 1. template literal in server.mjs: instructions: `...`
|
|
41
41
|
* 2. array-join in server.mjs: instructions: [ '...', '...' ].join('\n')
|
|
42
42
|
* 3. (v2.31.3+) builder call in server.mjs referencing INSTRUCTIONS_BASE +
|
|
43
|
-
* INSTRUCTIONS_VERBOSE arrays in
|
|
43
|
+
* INSTRUCTIONS_VERBOSE arrays in search-scoring.mjs. Measured at the
|
|
44
44
|
* verbose form — this is the cost-per-turn baseline the benchmark tracks.
|
|
45
45
|
* Returns '' if no shape matches (caller treats byte count as 0).
|
|
46
46
|
*/
|
|
@@ -56,10 +56,10 @@ function readMcpInstructions() {
|
|
|
56
56
|
if (arr) return extractStringArrayBody(arr[1]).join('\n');
|
|
57
57
|
|
|
58
58
|
// Form 3: buildServerInstructions() — reconstruct verbose form from
|
|
59
|
-
//
|
|
59
|
+
// search-scoring.mjs INSTRUCTIONS_BASE + INSTRUCTIONS_VERBOSE arrays.
|
|
60
60
|
if (/instructions:\s*buildServerInstructions\(/.test(src)) {
|
|
61
61
|
let internals;
|
|
62
|
-
try { internals = readFileSync(
|
|
62
|
+
try { internals = readFileSync(SEARCH_SCORING_PATH, 'utf8'); } catch { return ''; }
|
|
63
63
|
const base = internals.match(/INSTRUCTIONS_BASE\s*=\s*\[([\s\S]*?)\];/);
|
|
64
64
|
const verbose = internals.match(/INSTRUCTIONS_VERBOSE\s*=\s*\[([\s\S]*?)\];/);
|
|
65
65
|
const parts = [];
|
package/lib/err-sampler.mjs
CHANGED
|
@@ -17,6 +17,7 @@
|
|
|
17
17
|
|
|
18
18
|
import { appendFileSync, mkdirSync, existsSync } from 'fs';
|
|
19
19
|
import { join } from 'path';
|
|
20
|
+
import { scrubSecrets } from '../secret-scrub.mjs';
|
|
20
21
|
|
|
21
22
|
const DAY_MS = 86400000;
|
|
22
23
|
|
|
@@ -47,11 +48,14 @@ export function maybeSampleError(e, ctx, dbDir) {
|
|
|
47
48
|
const errDir = join(dbDir, 'errors');
|
|
48
49
|
if (!existsSync(errDir)) mkdirSync(errDir, { recursive: true, mode: 0o700 });
|
|
49
50
|
|
|
51
|
+
// Scrub BEFORE truncating: a connection string / Authorization header / 401
|
|
52
|
+
// body can ride along in an error message or stack frame. Scrub the full
|
|
53
|
+
// string first so a secret straddling the slice boundary is still caught.
|
|
50
54
|
const line = JSON.stringify({
|
|
51
55
|
ts: new Date().toISOString(),
|
|
52
|
-
ctx: String(ctx || '').slice(0, 120),
|
|
53
|
-
msg: String(e?.message ?? e ?? '').slice(0, 500),
|
|
54
|
-
stack: typeof e?.stack === 'string' ? e.stack.split('\n').slice(0, 6).join('\n') : undefined,
|
|
56
|
+
ctx: scrubSecrets(String(ctx || '')).slice(0, 120),
|
|
57
|
+
msg: scrubSecrets(String(e?.message ?? e ?? '')).slice(0, 500),
|
|
58
|
+
stack: typeof e?.stack === 'string' ? scrubSecrets(e.stack.split('\n').slice(0, 6).join('\n')) : undefined,
|
|
55
59
|
}) + '\n';
|
|
56
60
|
|
|
57
61
|
appendFileSync(join(errDir, `${today()}.jsonl`), line, { mode: 0o600 });
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
// lib/lesson-idents.mjs — pure, zero-dependency extractor of code identifiers a
|
|
2
|
+
// lesson names, for the bind-salience PostToolUse "dropped a required reference"
|
|
3
|
+
// check (scripts/post-tool-recall.js). Imported by hot standalone hooks → NO
|
|
4
|
+
// heavy imports (lesson #8447): regex over a string only.
|
|
5
|
+
//
|
|
6
|
+
// Identifier shapes: backtick-quoted, camelCase, snake_case, length >= MIN_LEN.
|
|
7
|
+
// These name functions/columns a lesson tells you to keep (recoverChildrenOf,
|
|
8
|
+
// compressed_into). Plain prose ("recover", "delete") is intentionally excluded.
|
|
9
|
+
|
|
10
|
+
const MIN_LEN = 5;
|
|
11
|
+
const BACKTICK = /`([A-Za-z_][A-Za-z0-9_]*)`/g;
|
|
12
|
+
const CAMEL = /\b([a-z][a-z0-9]*[A-Z][A-Za-z0-9]*)\b/g;
|
|
13
|
+
const SNAKE = /\b([a-z][a-z0-9]*(?:_[a-z0-9]+)+)\b/g;
|
|
14
|
+
|
|
15
|
+
export function extractIdents(text) {
|
|
16
|
+
const s = text || '';
|
|
17
|
+
if (!s) return [];
|
|
18
|
+
const out = new Set();
|
|
19
|
+
for (const re of [BACKTICK, CAMEL, SNAKE]) {
|
|
20
|
+
for (const m of s.matchAll(re)) if (m[1].length >= MIN_LEN) out.add(m[1]);
|
|
21
|
+
}
|
|
22
|
+
return [...out];
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
// Of the identifiers a lesson names, keep only those literally present in the
|
|
26
|
+
// pre-edit file — so the PostToolUse check flags "you removed X" and never
|
|
27
|
+
// "you didn't add X that was never here" (the false positive). '' content → [].
|
|
28
|
+
export function presentIdents(lessonText, content) {
|
|
29
|
+
const c = content || '';
|
|
30
|
+
if (!c) return [];
|
|
31
|
+
return extractIdents(lessonText).filter((id) => c.includes(id));
|
|
32
|
+
}
|
|
@@ -0,0 +1,112 @@
|
|
|
1
|
+
// lib/proc-lock.mjs — best-effort inter-process advisory lock (O_EXCL file).
|
|
2
|
+
//
|
|
3
|
+
// Why: multiple Claude Code sessions can fire SessionStart hooks (and their
|
|
4
|
+
// self-heal / auto-update write paths) at the same instant. install(),
|
|
5
|
+
// install.mjs repair, and hook-update.installExtractedRelease all rename source
|
|
6
|
+
// files into the live install dir; two of them interleaving produces a torn /
|
|
7
|
+
// mixed-version install (server vN + hook vN+1). The launcher's 6h cooldown
|
|
8
|
+
// only RATE-LIMITS re-spawns — it is not mutual exclusion (two processes can
|
|
9
|
+
// both observe "no recent attempt" and both spawn). This gives the write paths
|
|
10
|
+
// a real cross-process gate.
|
|
11
|
+
//
|
|
12
|
+
// Semantics: acquireLock() atomically creates the lock file with O_EXCL. If it
|
|
13
|
+
// already exists it is stolen only when STALE (holder's timestamp older than
|
|
14
|
+
// staleMs, or the recorded pid is provably dead on this host). A live holder →
|
|
15
|
+
// acquire returns null and the caller no-ops (someone else is already doing the
|
|
16
|
+
// write). Release unlinks the file. Crash-safe: a crashed holder's lock ages
|
|
17
|
+
// out via staleMs so the next session reclaims it.
|
|
18
|
+
|
|
19
|
+
import { writeFileSync, readFileSync, unlinkSync, mkdirSync } from 'node:fs';
|
|
20
|
+
import { dirname } from 'node:path';
|
|
21
|
+
|
|
22
|
+
// 5 min: comfortably longer than any install/update write phase (npm install in
|
|
23
|
+
// staging is timeout-capped at 60s) but short enough that a crashed holder does
|
|
24
|
+
// not block self-heal for long.
|
|
25
|
+
const DEFAULT_STALE_MS = 5 * 60 * 1000;
|
|
26
|
+
|
|
27
|
+
function pidAlive(pid) {
|
|
28
|
+
if (typeof pid !== 'number' || pid <= 0) return false;
|
|
29
|
+
try {
|
|
30
|
+
// Signal 0 = existence check, no signal delivered. EPERM means the process
|
|
31
|
+
// exists but is owned by another user (still "alive" for our purposes).
|
|
32
|
+
process.kill(pid, 0);
|
|
33
|
+
return true;
|
|
34
|
+
} catch (e) {
|
|
35
|
+
return e.code === 'EPERM';
|
|
36
|
+
}
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
function isStale(lockPath, staleMs, now) {
|
|
40
|
+
try {
|
|
41
|
+
const { pid, ts } = JSON.parse(readFileSync(lockPath, 'utf8'));
|
|
42
|
+
if (typeof ts === 'number' && now() - ts > staleMs) return true;
|
|
43
|
+
// Same-host fast reclaim: holder pid is gone. Cross-host (shared homedir)
|
|
44
|
+
// the pid is meaningless, but ts-staleness above still reclaims it.
|
|
45
|
+
if (typeof pid === 'number' && !pidAlive(pid)) return true;
|
|
46
|
+
return false;
|
|
47
|
+
} catch {
|
|
48
|
+
return true; // unparseable / unreadable lock → treat as stale and reclaim
|
|
49
|
+
}
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
function makeRelease(lockPath) {
|
|
53
|
+
let released = false;
|
|
54
|
+
return function release() {
|
|
55
|
+
if (released) return;
|
|
56
|
+
released = true;
|
|
57
|
+
try { unlinkSync(lockPath); } catch { /* already gone — fine */ }
|
|
58
|
+
};
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
/**
|
|
62
|
+
* Try to acquire an advisory lock. Non-blocking.
|
|
63
|
+
* @param {string} lockPath Absolute path to the lock file.
|
|
64
|
+
* @param {object} [opts]
|
|
65
|
+
* @param {number} [opts.staleMs] Age after which a held lock is stolen.
|
|
66
|
+
* @param {() => number} [opts.now] Clock injection seam (tests).
|
|
67
|
+
* @returns {(() => void)|null} A release() fn, or null if a live peer holds it.
|
|
68
|
+
*/
|
|
69
|
+
export function acquireLock(lockPath, { staleMs = DEFAULT_STALE_MS, now = Date.now } = {}) {
|
|
70
|
+
try { mkdirSync(dirname(lockPath), { recursive: true }); } catch { /* best-effort */ }
|
|
71
|
+
const payload = JSON.stringify({ pid: process.pid, ts: now() });
|
|
72
|
+
try {
|
|
73
|
+
writeFileSync(lockPath, payload, { flag: 'wx' }); // O_EXCL — atomic create
|
|
74
|
+
return makeRelease(lockPath);
|
|
75
|
+
} catch (e) {
|
|
76
|
+
if (e.code !== 'EEXIST') return null; // permission / fs error → fail closed
|
|
77
|
+
if (!isStale(lockPath, staleMs, now)) return null; // live peer holds it
|
|
78
|
+
// Stale: steal it. unlink + re-create exclusively; lose the race → null.
|
|
79
|
+
try { unlinkSync(lockPath); } catch { /* raced */ }
|
|
80
|
+
try {
|
|
81
|
+
writeFileSync(lockPath, payload, { flag: 'wx' });
|
|
82
|
+
return makeRelease(lockPath);
|
|
83
|
+
} catch {
|
|
84
|
+
return null;
|
|
85
|
+
}
|
|
86
|
+
}
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
/**
|
|
90
|
+
* Run `fn` while holding the lock; release in a finally. No-op if not acquired.
|
|
91
|
+
* @returns {{acquired: boolean, result?: any}}
|
|
92
|
+
*/
|
|
93
|
+
export function withLock(lockPath, fn, opts) {
|
|
94
|
+
const release = acquireLock(lockPath, opts);
|
|
95
|
+
if (!release) return { acquired: false };
|
|
96
|
+
try {
|
|
97
|
+
return { acquired: true, result: fn() };
|
|
98
|
+
} finally {
|
|
99
|
+
release();
|
|
100
|
+
}
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
/** Async variant of withLock — awaits `fn`. */
|
|
104
|
+
export async function withLockAsync(lockPath, fn, opts) {
|
|
105
|
+
const release = acquireLock(lockPath, opts);
|
|
106
|
+
if (!release) return { acquired: false };
|
|
107
|
+
try {
|
|
108
|
+
return { acquired: true, result: await fn() };
|
|
109
|
+
} finally {
|
|
110
|
+
release();
|
|
111
|
+
}
|
|
112
|
+
}
|
package/lib/search-core.mjs
CHANGED
|
@@ -1,26 +1,33 @@
|
|
|
1
|
-
// Shared cross-source search core
|
|
2
|
-
// normalization / sort / pagination math).
|
|
1
|
+
// Shared cross-source search core for cmdSearch (CLI) and mem_search (MCP).
|
|
3
2
|
//
|
|
4
|
-
//
|
|
5
|
-
//
|
|
6
|
-
//
|
|
7
|
-
//
|
|
8
|
-
//
|
|
9
|
-
//
|
|
10
|
-
//
|
|
11
|
-
//
|
|
12
|
-
// fields), error-message wording, and output rendering.
|
|
3
|
+
// coreRunSearchPipeline (below) is the SINGLE orchestration body — deep /
|
|
4
|
+
// auto-escalation → per-source query (obs hybrid + sessions + prompts) →
|
|
5
|
+
// cross-source normalize+sort → context re-rank + supersede → tier filter →
|
|
6
|
+
// user sort → count+paginate. The two ~180-line bodies that cmdSearch and
|
|
7
|
+
// runSearchPipeline used to keep hand-synced via "paired-path" comments are
|
|
8
|
+
// gone (audit P1-2); each surface is now a thin adapter that parses/validates
|
|
9
|
+
// in and renders out. Cross-surface equivalence is enforced by
|
|
10
|
+
// tests/search-parity.test.mjs, not by comments.
|
|
13
11
|
//
|
|
14
|
-
//
|
|
15
|
-
//
|
|
16
|
-
//
|
|
17
|
-
//
|
|
12
|
+
// Surfaces legitimately differ on a few policy points; each is an explicit opt
|
|
13
|
+
// on coreRunSearchPipeline (obsTypeFallback / crossSourceEpochSortNoFts /
|
|
14
|
+
// rerankPolicy / recentListingNoFts / tolerateMissingFts / tierPosition …) so
|
|
15
|
+
// behavior is strictly preserved and any future convergence is a deliberate
|
|
16
|
+
// change, not an accident. Notable preserved asymmetries:
|
|
17
|
+
// • CLI forces source=observations for --type/--tier/--importance/--branch;
|
|
18
|
+
// MCP only forces it for obs_type. (effectiveSource is computed per-adapter.)
|
|
18
19
|
// • CLI warns on inverted --from/--to ranges; MCP does not.
|
|
19
|
-
// • CLI
|
|
20
|
+
// • CLI tolerates missing session/prompt FTS (pre-FTS legacy DBs); MCP does not.
|
|
21
|
+
// • MCP lists-recent-by-type on a 0-match obs_type query; CLI does not (#8217).
|
|
22
|
+
//
|
|
23
|
+
// Result rows use one canonical `source` key; session/prompt rows carry dual
|
|
24
|
+
// keys (date=created_at, text=prompt_text, session=content_session_id) so each
|
|
25
|
+
// surface's renderer reads its own field names off a single row shape.
|
|
20
26
|
|
|
21
27
|
import { sanitizeFtsQuery, relaxFtsQueryToOr, SESS_BM25, DEFAULT_DECAY_HALF_LIFE_MS } from '../utils.mjs';
|
|
22
28
|
import { cjkPrecisionOk, extractCjkLikePatterns } from '../nlp.mjs';
|
|
23
29
|
import { computeTier } from '../tier.mjs';
|
|
30
|
+
import { countSearchTotal, attachBodyTokens } from '../search-engine.mjs';
|
|
24
31
|
|
|
25
32
|
/** Sanitize a user query to FTS5 syntax; optionally force OR semantics. */
|
|
26
33
|
export function buildSearchFtsQuery(query, { or = false } = {}) {
|
|
@@ -198,3 +205,252 @@ export function applyTierFilter(db, results, { tier, sourceKey, currentProject }
|
|
|
198
205
|
return full && computeTier(full, tierCtx) === tier;
|
|
199
206
|
});
|
|
200
207
|
}
|
|
208
|
+
|
|
209
|
+
/**
|
|
210
|
+
* Finalize a merged, scored result set into one page: compute the TRUE
|
|
211
|
+
* (limit/offset-invariant) population, slice the requested page, and attach the
|
|
212
|
+
* ~Nt fetch-cost hint. Single source of truth for the count+paginate+enrich tail
|
|
213
|
+
* of cmdSearch (CLI) and runSearchPipeline (MCP) — exactly where #8635 drifted
|
|
214
|
+
* (the over-fetch cap leaked into the reported total on BOTH sides independently).
|
|
215
|
+
*
|
|
216
|
+
* `total`: every source over-fetched from offset 0 (computePerSourceWindow), so
|
|
217
|
+
* results.length is the over-fetched candidate pool, NOT the population —
|
|
218
|
+
* countSearchTotal re-derives the real MATCH+filter count. Clamp to
|
|
219
|
+
* >= results.length so vector/concept-augmented obs rows are never undercounted
|
|
220
|
+
* (#8217/#8638). For deep (explicit or auto-escalated) the population IS the fused
|
|
221
|
+
* variant set already in `results` (deepSearch is obs-only, capped at
|
|
222
|
+
* perSourceLimit); countSearchTotal would instead count the ORIGINAL query's FTS
|
|
223
|
+
* matches — wrong, and ~0 on the vocabulary-mismatch queries deep exists for (F1).
|
|
224
|
+
*
|
|
225
|
+
* Pagination always slices: single-source results can exceed SQL LIMIT via
|
|
226
|
+
* expansion (concept co-occurrence / PRF / vector), and `offset` is applied
|
|
227
|
+
* exactly ONCE here (the per-source SQL always saw offset 0).
|
|
228
|
+
*
|
|
229
|
+
* @returns {{ total: number, page: object[] }}
|
|
230
|
+
*/
|
|
231
|
+
export function finalizeSearchPage(db, results, {
|
|
232
|
+
isDeep, offset, limit, effectiveSource, ftsQuery, orFallbackFired,
|
|
233
|
+
project = null, obsType = null, importance = null, branch = null,
|
|
234
|
+
epochFrom = null, epochTo = null, includeNoise = false,
|
|
235
|
+
}) {
|
|
236
|
+
const total = isDeep
|
|
237
|
+
? results.length
|
|
238
|
+
: Math.max(countSearchTotal(db, {
|
|
239
|
+
effectiveSource: effectiveSource || null,
|
|
240
|
+
ftsQuery,
|
|
241
|
+
obsFtsQuery: effectiveObsFtsQuery(ftsQuery, orFallbackFired),
|
|
242
|
+
args: { project: project || null, obs_type: obsType || null, importance: importance || null, branch: branch || null },
|
|
243
|
+
project: project || null,
|
|
244
|
+
epochFrom, epochTo, includeNoise,
|
|
245
|
+
}), results.length);
|
|
246
|
+
const page = results.slice(offset, offset + limit);
|
|
247
|
+
attachBodyTokens(db, page);
|
|
248
|
+
return { total, page };
|
|
249
|
+
}
|
|
250
|
+
|
|
251
|
+
/**
|
|
252
|
+
* Unified cross-source search orchestrator — single source of truth for the CLI
|
|
253
|
+
* (cmdSearch) and MCP (mem_search) search bodies: deep / auto-escalation →
|
|
254
|
+
* per-source query (obs hybrid + sessions + prompts) → cross-source
|
|
255
|
+
* normalize+sort → context re-rank + supersede → tier post-filter → user sort →
|
|
256
|
+
* count+paginate. The two surfaces legitimately differ on a handful of policy
|
|
257
|
+
* points; each is a named opt so a future "fix" is a deliberate contract change,
|
|
258
|
+
* not an accident (see the per-opt comments).
|
|
259
|
+
*
|
|
260
|
+
* The core does NO flag/schema parsing, NO stdout/stderr, NO formatting — adapters
|
|
261
|
+
* validate+parse on the way in and render on the way out. Session/prompt rows
|
|
262
|
+
* carry dual keys (`date`=created_at, `text`=prompt_text, `session`=content_session_id)
|
|
263
|
+
* so both renderers read their own field names off one canonical row.
|
|
264
|
+
*
|
|
265
|
+
* #8743: `db` comes ONLY from `ctx.db` — there is no module-global fallback, so a
|
|
266
|
+
* per-source leg can never silently query the wrong database.
|
|
267
|
+
*
|
|
268
|
+
* @returns {Promise<{ page:object[], total:number, preFinalizeCount:number, isDeep:boolean,
|
|
269
|
+
* escalated:boolean, escalatedObsCount:number, variants:object[]|null,
|
|
270
|
+
* reranked:boolean, orFallbackFired:boolean, effectiveSource:string|null, ftsQuery:string }>}
|
|
271
|
+
*/
|
|
272
|
+
export async function coreRunSearchPipeline(ctx, opts) {
|
|
273
|
+
const {
|
|
274
|
+
db, currentProject = null, env = process.env,
|
|
275
|
+
searchObservationsHybrid, deepSearch, shouldEscalateToDeep,
|
|
276
|
+
autoDeepLlmReady, reRankWithContext, markSuperseded,
|
|
277
|
+
llm = null, rerankLlm = undefined,
|
|
278
|
+
} = ctx;
|
|
279
|
+
const {
|
|
280
|
+
query, ftsQuery, effectiveSource = null, deepMode = 'normal', rerank = false,
|
|
281
|
+
limit, offset, project = null, obsType = null, importance = null, branch = null,
|
|
282
|
+
includeNoise = false, epochFrom = null, epochTo = null, sort = 'relevance', tier = null,
|
|
283
|
+
// ── surface policy (strict behavior-preservation; the two surfaces differ) ──
|
|
284
|
+
obsTypeFallback = false, // A5: list-recent-by-type when 0 matches — MCP true, CLI false (#8217 removed it from CLI)
|
|
285
|
+
crossSourceEpochSortNoFts = false, // A3: epoch-sort the cross-source set when no ftsQuery — MCP true, CLI false
|
|
286
|
+
rerankPolicy = 'mcp', // A4: re-rank/supersede gate + re-sort condition — 'mcp' | 'cli'
|
|
287
|
+
rerankProject = null, // reRankWithContext project — MCP currentProject, CLI project||inferProject()
|
|
288
|
+
recentListingNoFts = false, // session/prompt recent-listing when no ftsQuery (explicit --source) — MCP true, CLI false
|
|
289
|
+
tolerateMissingFts = false, // wrap session/prompt FTS in try/catch for pre-FTS legacy DBs — CLI true, MCP false
|
|
290
|
+
tierPosition = 'late', // tier filter vs re-rank ordering — MCP 'late' (after re-rank), CLI 'early' (in obs block)
|
|
291
|
+
tierProject = null, // applyTierFilter project — MCP project||currentProject, CLI project||inferProject()
|
|
292
|
+
} = opts;
|
|
293
|
+
|
|
294
|
+
const { perSourceLimit, perSourceOffset } = computePerSourceWindow(limit, offset);
|
|
295
|
+
const isCrossSource = !effectiveSource;
|
|
296
|
+
const results = [];
|
|
297
|
+
let orFallbackFired = false;
|
|
298
|
+
let deepVariants = null;
|
|
299
|
+
let deepReranked = false;
|
|
300
|
+
let isDeep = deepMode === 'deep';
|
|
301
|
+
let escalated = false;
|
|
302
|
+
let escalatedObsCount = 0;
|
|
303
|
+
|
|
304
|
+
const obsCtx = {
|
|
305
|
+
db, ftsQuery,
|
|
306
|
+
args: { project, obs_type: obsType, importance, branch, include_noise: includeNoise },
|
|
307
|
+
epochFrom, epochTo, perSourceLimit, perSourceOffset, currentProject, limit,
|
|
308
|
+
orFallbackFired: false,
|
|
309
|
+
};
|
|
310
|
+
|
|
311
|
+
const runDeep = async ({ auto = false } = {}) => {
|
|
312
|
+
const ds = await deepSearch(db, {
|
|
313
|
+
query, project, type: obsType, importance, branch, includeNoise,
|
|
314
|
+
epochFrom, epochTo, limit: perSourceLimit, currentProject,
|
|
315
|
+
}, llm ? { llm, rerank: rerank && !auto, rerankLlm } : { auto, rerank: rerank && !auto, rerankLlm });
|
|
316
|
+
deepVariants = ds.variants;
|
|
317
|
+
deepReranked = ds.reranked;
|
|
318
|
+
return ds.results;
|
|
319
|
+
};
|
|
320
|
+
|
|
321
|
+
// ── Observations (hybrid engine; deep / auto-escalation; obs rows already carry source:'obs') ──
|
|
322
|
+
if (!effectiveSource || effectiveSource === 'observations') {
|
|
323
|
+
if (deepMode === 'deep') {
|
|
324
|
+
results.push(...await runDeep());
|
|
325
|
+
} else {
|
|
326
|
+
results.push(...searchObservationsHybrid(db, obsCtx));
|
|
327
|
+
if (obsCtx.orFallbackFired) orFallbackFired = true;
|
|
328
|
+
const obsCountBefore = results.filter((r) => r.source === 'obs').length;
|
|
329
|
+
if (deepMode === 'auto' && autoDeepLlmReady(env, llm) &&
|
|
330
|
+
shouldEscalateToDeep(results.filter((r) => r.source === 'obs'), obsCtx, { db, project })) {
|
|
331
|
+
const deepRows = await runDeep({ auto: true });
|
|
332
|
+
results.length = 0;
|
|
333
|
+
results.push(...deepRows);
|
|
334
|
+
isDeep = true;
|
|
335
|
+
escalated = true;
|
|
336
|
+
escalatedObsCount = obsCountBefore;
|
|
337
|
+
}
|
|
338
|
+
}
|
|
339
|
+
}
|
|
340
|
+
|
|
341
|
+
// ── Tier post-filter, CLI position: obs-only (tier forces observations), before re-rank ──
|
|
342
|
+
if (tier && tierPosition === 'early') {
|
|
343
|
+
const filtered = applyTierFilter(db, results, { tier, sourceKey: 'source', currentProject: tierProject });
|
|
344
|
+
results.length = 0; results.push(...filtered);
|
|
345
|
+
}
|
|
346
|
+
|
|
347
|
+
// ── Sessions (FTS via shared helper; optional recent-listing when no ftsQuery) ──
|
|
348
|
+
if ((!effectiveSource || effectiveSource === 'sessions') && !isDeep) {
|
|
349
|
+
const pushSessions = () => {
|
|
350
|
+
if (ftsQuery) {
|
|
351
|
+
const rows = searchSessionsFts(db, { ftsQuery, project, projectBoost: project ? null : currentProject, epochFrom, epochTo, perSourceLimit, perSourceOffset });
|
|
352
|
+
for (const r of rows) results.push({ ...r, source: 'session', date: r.created_at });
|
|
353
|
+
} else if (recentListingNoFts && effectiveSource === 'sessions') {
|
|
354
|
+
const params = []; const wheres = [];
|
|
355
|
+
if (project) { wheres.push('project = ?'); params.push(project); }
|
|
356
|
+
if (epochFrom !== null) { wheres.push('created_at_epoch >= ?'); params.push(epochFrom); }
|
|
357
|
+
if (epochTo !== null) { wheres.push('created_at_epoch <= ?'); params.push(epochTo); }
|
|
358
|
+
const where = wheres.length ? `WHERE ${wheres.join(' AND ')}` : '';
|
|
359
|
+
params.push(perSourceLimit, perSourceOffset);
|
|
360
|
+
const rows = db.prepare(`
|
|
361
|
+
SELECT id, request, completed, project, created_at, created_at_epoch
|
|
362
|
+
FROM session_summaries ${where}
|
|
363
|
+
ORDER BY created_at_epoch DESC
|
|
364
|
+
LIMIT ? OFFSET ?
|
|
365
|
+
`).all(...params);
|
|
366
|
+
for (const r of rows) results.push({ ...r, source: 'session', date: r.created_at });
|
|
367
|
+
}
|
|
368
|
+
};
|
|
369
|
+
if (tolerateMissingFts) { try { pushSessions(); } catch { /* session FTS may not exist in older DBs */ } } else pushSessions();
|
|
370
|
+
}
|
|
371
|
+
|
|
372
|
+
// ── Prompts (FTS via shared helper incl. CJK gate; optional recent-listing) ──
|
|
373
|
+
if ((!effectiveSource || effectiveSource === 'prompts') && !isDeep) {
|
|
374
|
+
const pushPrompts = () => {
|
|
375
|
+
if (ftsQuery) {
|
|
376
|
+
const rows = searchPromptsFts(db, { query, ftsQuery, project, epochFrom, epochTo, perSourceLimit, perSourceOffset });
|
|
377
|
+
for (const r of rows) results.push({ ...r, source: 'prompt', date: r.created_at, text: r.prompt_text, session: r.content_session_id });
|
|
378
|
+
} else if (recentListingNoFts && effectiveSource === 'prompts') {
|
|
379
|
+
const params = []; const wheres = [];
|
|
380
|
+
if (project) { wheres.push('s.project = ?'); params.push(project); }
|
|
381
|
+
if (epochFrom !== null) { wheres.push('p.created_at_epoch >= ?'); params.push(epochFrom); }
|
|
382
|
+
if (epochTo !== null) { wheres.push('p.created_at_epoch <= ?'); params.push(epochTo); }
|
|
383
|
+
const where = wheres.length ? `WHERE ${wheres.join(' AND ')}` : '';
|
|
384
|
+
params.push(perSourceLimit, perSourceOffset);
|
|
385
|
+
const rows = db.prepare(`
|
|
386
|
+
SELECT p.id, p.prompt_text, p.content_session_id, p.created_at, p.created_at_epoch
|
|
387
|
+
FROM user_prompts p
|
|
388
|
+
JOIN sdk_sessions s ON p.content_session_id = s.content_session_id
|
|
389
|
+
${where}
|
|
390
|
+
ORDER BY p.created_at_epoch DESC
|
|
391
|
+
LIMIT ? OFFSET ?
|
|
392
|
+
`).all(...params);
|
|
393
|
+
for (const r of rows) results.push({ ...r, source: 'prompt', date: r.created_at, text: r.prompt_text, session: r.content_session_id });
|
|
394
|
+
}
|
|
395
|
+
};
|
|
396
|
+
if (tolerateMissingFts) { try { pushPrompts(); } catch { /* prompt FTS may not exist in older DBs */ } } else pushPrompts();
|
|
397
|
+
}
|
|
398
|
+
|
|
399
|
+
// ── Type-list fallback (MCP): obs_type set + 0 matches → list recent of that type ──
|
|
400
|
+
if (obsTypeFallback && results.length === 0 && obsType) {
|
|
401
|
+
const typeWheres = ['COALESCE(compressed_into, 0) = 0', 'superseded_at IS NULL', 'type = ?'];
|
|
402
|
+
const typeParams = [obsType];
|
|
403
|
+
if (project) { typeWheres.push('project = ?'); typeParams.push(project); }
|
|
404
|
+
if (epochFrom !== null) { typeWheres.push('created_at_epoch >= ?'); typeParams.push(epochFrom); }
|
|
405
|
+
if (epochTo !== null) { typeWheres.push('created_at_epoch <= ?'); typeParams.push(epochTo); }
|
|
406
|
+
if (importance) { typeWheres.push('COALESCE(importance, 1) >= ?'); typeParams.push(importance); }
|
|
407
|
+
typeParams.push(limit);
|
|
408
|
+
const typeRows = db.prepare(`
|
|
409
|
+
SELECT id, type, title, subtitle, project, created_at, importance, files_modified
|
|
410
|
+
FROM observations WHERE ${typeWheres.join(' AND ')}
|
|
411
|
+
ORDER BY created_at_epoch DESC LIMIT ?
|
|
412
|
+
`).all(...typeParams);
|
|
413
|
+
for (const r of typeRows) results.push({ source: 'obs', id: r.id, type: r.type, title: r.title, subtitle: r.subtitle, project: r.project, date: r.created_at, importance: r.importance, files_modified: r.files_modified, score: 0, snippet: '' });
|
|
414
|
+
}
|
|
415
|
+
|
|
416
|
+
// ── Cross-source normalize + sort ──
|
|
417
|
+
if (isCrossSource && results.length > 0 && ftsQuery) normalizeCrossSourceScores(results, 'source');
|
|
418
|
+
if (isCrossSource && results.length > 0) {
|
|
419
|
+
if (ftsQuery) results.sort((a, b) => (a.score ?? 0) - (b.score ?? 0));
|
|
420
|
+
else if (crossSourceEpochSortNoFts) results.sort((a, b) => (b.created_at_epoch ?? 0) - (a.created_at_epoch ?? 0));
|
|
421
|
+
}
|
|
422
|
+
|
|
423
|
+
// ── Context re-rank + superseded marking (markSuperseded is pure stale-tagging) ──
|
|
424
|
+
const hasObs = results.some((r) => r.source === 'obs');
|
|
425
|
+
const rerankGate = rerankPolicy === 'mcp' ? ((ftsQuery || isDeep) && hasObs) : hasObs;
|
|
426
|
+
if (rerankGate) {
|
|
427
|
+
const obsResults = results.filter((r) => r.source === 'obs');
|
|
428
|
+
const doReRank = rerankPolicy === 'mcp' ? (ftsQuery && !deepReranked) : !deepReranked;
|
|
429
|
+
if (doReRank) reRankWithContext(db, obsResults, rerankProject);
|
|
430
|
+
markSuperseded(obsResults);
|
|
431
|
+
const doReSort = rerankPolicy === 'mcp' ? (ftsQuery && !deepReranked) : isCrossSource;
|
|
432
|
+
if (doReSort) results.sort((a, b) => (a.score ?? 0) - (b.score ?? 0));
|
|
433
|
+
}
|
|
434
|
+
|
|
435
|
+
// ── Tier post-filter, MCP position: after re-rank, on the merged set ──
|
|
436
|
+
if (tier && tierPosition === 'late') {
|
|
437
|
+
const filtered = applyTierFilter(db, results, { tier, sourceKey: 'source', currentProject: tierProject });
|
|
438
|
+
results.length = 0; results.push(...filtered);
|
|
439
|
+
}
|
|
440
|
+
|
|
441
|
+
// ── User-requested sort (after relevance scoring) ──
|
|
442
|
+
applyUserSort(results, sort);
|
|
443
|
+
|
|
444
|
+
// ── Count + paginate + ~Nt enrich. preFinalizeCount lets the CLI adapter
|
|
445
|
+
// distinguish "nothing matched" from "this page is empty" (its two messages). ──
|
|
446
|
+
const preFinalizeCount = results.length;
|
|
447
|
+
const { total, page } = finalizeSearchPage(db, results, {
|
|
448
|
+
isDeep, offset, limit, effectiveSource, ftsQuery, orFallbackFired,
|
|
449
|
+
project, obsType, importance, branch, epochFrom, epochTo, includeNoise,
|
|
450
|
+
});
|
|
451
|
+
|
|
452
|
+
return {
|
|
453
|
+
page, total, preFinalizeCount, isDeep, escalated, escalatedObsCount,
|
|
454
|
+
variants: deepVariants, reranked: deepReranked, orFallbackFired, effectiveSource, ftsQuery,
|
|
455
|
+
};
|
|
456
|
+
}
|