claude-mem-lite 3.1.2 → 3.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -10,7 +10,7 @@
10
10
  "plugins": [
11
11
  {
12
12
  "name": "claude-mem-lite",
13
- "version": "3.1.2",
13
+ "version": "3.3.0",
14
14
  "source": "./",
15
15
  "description": "Persistent long-term memory for Claude Code via MCP — captures coding decisions, bugfixes, and context across sessions. Hybrid FTS5 + TF-IDF search with episode batching. Single SQLite DB, no external services. A lighter, lower-cost alternative to claude-mem (episode batching + a smaller model; cost savings are an internal estimate, not a measured benchmark)."
16
16
  }
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "claude-mem-lite",
3
- "version": "3.1.2",
3
+ "version": "3.3.0",
4
4
  "description": "Persistent long-term memory for Claude Code via MCP — captures coding decisions, bugfixes, and context across sessions. Hybrid FTS5 + TF-IDF search with episode batching. Single SQLite DB, no external services. A lighter, lower-cost alternative to claude-mem (episode batching + a smaller model; cost savings are an internal estimate, not a measured benchmark).",
5
5
  "author": {
6
6
  "name": "sdsrss"
package/deep-search.mjs CHANGED
@@ -38,6 +38,107 @@ import { RRF_K } from './tfidf.mjs';
38
38
  // original + up to 3 rewrites (keyword / concept-expansion / HyDE).
39
39
  export const MAX_VARIANTS = 4;
40
40
 
41
+ // ─── Auto-escalation (opt-in adaptive deep search) ──────────────────────────
42
+ // Result-count floor below which a normal search is "weak" enough to auto-escalate
43
+ // to deepSearch. Calibrated against the deep-search benchmark fixtures; 3 is the
44
+ // starting point (vocabulary-mismatch misses typically return 0-2 obs rows).
45
+ export const AUTO_DEEP_MIN_RESULTS = 3;
46
+
47
+ // Corpus-size floor below which auto-escalation is skipped entirely.
48
+ // A near-empty store can't be rescued by HyDE/multi-query, so the Haiku call
49
+ // would be wasted. Project-scoped when a project arg is provided, else global.
50
+ export const AUTO_DEEP_MIN_CORPUS = 10;
51
+
52
+ /**
53
+ * Cheap guard: does the project have enough stored observations for deep search
54
+ * to plausibly help? A near-empty store can't be rescued by HyDE/multi-query —
55
+ * skip escalation (and its Haiku call) there. Project-scoped when `project` is
56
+ * given, else global. Counts only live obs (not superseded/compressed).
57
+ * @returns {boolean} true if count >= min
58
+ */
59
+ export function hasEscalatableCorpus(db, project, min = AUTO_DEEP_MIN_CORPUS) {
60
+ try {
61
+ const where = ['superseded_at IS NULL', 'COALESCE(compressed_into, 0) = 0'];
62
+ const params = [];
63
+ if (project) { where.push('project = ?'); params.push(project); }
64
+ const row = db.prepare(`SELECT COUNT(*) AS c FROM observations WHERE ${where.join(' AND ')}`).get(...params);
65
+ return (row?.c ?? 0) >= min;
66
+ } catch { return true; } // on any error, don't suppress escalation (fail open)
67
+ }
68
+
69
+ /**
70
+ * Is a usable LLM available for AUTO escalation? True when a stub/real llm is
71
+ * injected (tests), a FAST provider key is set, OR the claude-CLI fallback is
72
+ * enabled (D#40: default-on for CLI-auth users; kill switch
73
+ * CLAUDE_MEM_AUTO_DEEP_CLI=0). The CLI path is made safe for the long-lived
74
+ * server hot path by the async/fail-fast/throttled auto provider (deepSearch
75
+ * auto), not by being excluded as it was before D#40.
76
+ * @param {object} [env=process.env]
77
+ * @param {Function|undefined} [injectedLlm]
78
+ * @returns {boolean}
79
+ */
80
+ export function autoDeepLlmReady(env = process.env, injectedLlm) {
81
+ if (injectedLlm) return true;
82
+ if (env.ANTHROPIC_API_KEY || env.OPENROUTER_API_KEY) return true;
83
+ // No provider key → detectMode() would be 'cli'. CLI-auth users get auto
84
+ // escalation by default; the burst/latency cost is bounded by the auto
85
+ // provider (fail-fast + throttle) and a failed rewrite degrades to baseline.
86
+ // Kill switch honors the common disable spellings, not just the exact '0'.
87
+ const off = String(env.CLAUDE_MEM_AUTO_DEEP_CLI ?? '').trim().toLowerCase();
88
+ return !(off === '0' || off === 'false' || off === 'no' || off === 'off');
89
+ }
90
+
91
+ /**
92
+ * Zero-LLM heuristic: are the normal-search results weak enough to warrant
93
+ * auto-escalating to deepSearch? Reads ONLY rows already in hand. Never calls
94
+ * an LLM, so the decision itself is free — only a positive verdict costs a
95
+ * Haiku call (the escalation).
96
+ *
97
+ * Weak when: too few results (count below minResults floor).
98
+ *
99
+ * NOTE: ctx.orFallbackFired was intentionally removed as an escalation trigger.
100
+ * orFallbackFired fires on SUCCESSFUL AND→OR recovery — when the fallback
101
+ * returns enough results it is a sign the query is working, not that it is
102
+ * weak. Escalating on a successful recovery (a) discards good results already
103
+ * in hand, (b) fires an unwanted LLM call, and (c) erases the AND→OR hint
104
+ * that surfaces to the caller. The genuinely-weak vocab-mismatch case (AND
105
+ * fails, OR also fails) is still caught: if OR recovers nothing, count is 0-2
106
+ * → escalates on count alone.
107
+ *
108
+ * @param {Array} results normal-search rows
109
+ * @param {object} ctx the hybrid ctx the engine mutated (unused; kept for
110
+ * backward-compat with callers that pass it)
111
+ * @param {object} [opts]
112
+ * @param {number} [opts.minResults=AUTO_DEEP_MIN_RESULTS]
113
+ * @returns {boolean}
114
+ */
115
+ export function shouldEscalateToDeep(results, _ctx, { minResults = AUTO_DEEP_MIN_RESULTS } = {}) {
116
+ const n = Array.isArray(results) ? results.length : 0;
117
+ if (n < minResults) return true;
118
+ return false;
119
+ }
120
+
121
+ /**
122
+ * Resolve the tri-state deep mode. Precedence: explicit value > env flag >
123
+ * per-surface default.
124
+ * @param {boolean|undefined} explicitDeep caller's deep value (undefined = not passed)
125
+ * @param {object} opts
126
+ * @param {'mcp'|'cli'} opts.surface
127
+ * @param {object} [opts.env=process.env]
128
+ * @returns {'deep'|'auto'|'normal'}
129
+ * 'deep' — force deepSearch
130
+ * 'auto' — run normal search, escalate if weak
131
+ * 'normal' — run normal search, never escalate
132
+ */
133
+ export function resolveDeepMode(explicitDeep, { surface, env = process.env } = {}) {
134
+ if (explicitDeep === true) return 'deep';
135
+ if (explicitDeep === false) return 'normal';
136
+ const flag = env.CLAUDE_MEM_AUTO_DEEP;
137
+ if (flag === '0') return 'normal';
138
+ if (flag === '1') return 'auto';
139
+ return surface === 'mcp' ? 'auto' : 'normal';
140
+ }
141
+
41
142
  // Echoes hook-llm.mjs MEMORY_INPUT_GUARD (kept inline rather than imported so
42
143
  // this module — and the tests that import it — never pull in hook-llm's
43
144
  // native-heavy chain; see #8729). Same security intent: the query is untrusted.
@@ -97,12 +198,75 @@ export function assembleVariants(query, parsed, { max = MAX_VARIANTS } = {}) {
97
198
  return out;
98
199
  }
99
200
 
100
- // Default provider: pulled in lazily so importing deep-search.mjs (e.g. in tests
101
- // with an injected llm) never loads the LLM client. callModelJSON returns parsed
102
- // JSON or null, and never throws.
201
+ // ─── Auto-escalation safety machinery (D#40) ─────────────────────────────────
202
+ // The AUTO path can fire on every weak search across the long-lived MCP server,
203
+ // so it must be fail-fast (short timeout, no retry), throttled (bound bursts),
204
+ // and cached (skip repeat rewrites). The EXPLICIT deep=true path stays patient.
205
+
206
+ export const AUTO_DEEP_TIMEOUT_MS = 5000; // fail-fast budget for the auto path; no retry
207
+ export const AUTO_DEEP_THROTTLE_MS = 3000; // min gap between auto LLM rewrites, per process (bounds spawn rate)
208
+ const REWRITE_CACHE_MAX = 256; // LRU cap for the query→variants cache
209
+
210
+ let _lastAutoLlmAt = 0;
211
+ const _rewriteCache = new Map(); // normalized query → variants (string[]); successes only
212
+
213
+ /** Reset auto-path throttle + cache. Test-only; production state is per-process. */
214
+ export function _resetAutoDeepState() { _lastAutoLlmAt = 0; _rewriteCache.clear(); }
215
+
216
+ function cacheGet(key) {
217
+ if (!_rewriteCache.has(key)) return null;
218
+ const v = _rewriteCache.get(key);
219
+ _rewriteCache.delete(key); _rewriteCache.set(key, v); // LRU bump
220
+ return v.slice();
221
+ }
222
+ function cacheSet(key, variants) {
223
+ if (_rewriteCache.has(key)) _rewriteCache.delete(key);
224
+ _rewriteCache.set(key, variants.slice());
225
+ if (_rewriteCache.size > REWRITE_CACHE_MAX) {
226
+ _rewriteCache.delete(_rewriteCache.keys().next().value); // evict oldest
227
+ }
228
+ }
229
+
230
+ /**
231
+ * Wrap an llm so it fires at most once per `intervalMs` per process. A throttled
232
+ * call resolves null → rewriteQuery degrades to baseline (never worse). Exported
233
+ * for tests. Throttle state is module-global (shared across deepSearch calls).
234
+ *
235
+ * The clock advances on every ACTUAL call — success OR failure — deliberately:
236
+ * the throttle bounds the subprocess SPAWN RATE, and a failed spawn still costs a
237
+ * subprocess + its timeout, so a broken provider that always fails must be rate-
238
+ * limited too (gating only on success would let a persistent failure spawn on
239
+ * every weak search). The interval is kept short so one failure suppresses
240
+ * escalation only briefly, not for a long window.
241
+ */
242
+ export function makeThrottled(llm, { intervalMs = AUTO_DEEP_THROTTLE_MS } = {}) {
243
+ return async (prompt) => {
244
+ const now = Date.now();
245
+ if (now - _lastAutoLlmAt < intervalMs) return null;
246
+ _lastAutoLlmAt = now;
247
+ return llm(prompt);
248
+ };
249
+ }
250
+
251
+ // Run one rewrite LLM call via the fully-async dispatcher (callModelJSONAsync):
252
+ // every CLI invocation — cli-mode primary AND the post-provider-failure fallback
253
+ // — is non-blocking, so an MCP request handler never blocks the event loop even
254
+ // under a keyed-provider outage (D#40). Lazy import so tests with an injected llm
255
+ // never load the LLM client.
256
+ async function callRewriteLLM(prompt, { timeout }) {
257
+ const { callModelJSONAsync } = await import('./haiku-client.mjs');
258
+ return callModelJSONAsync(prompt, 'haiku', { timeout, maxTokens: 400 });
259
+ }
260
+
261
+ // Default (explicit deep=true) provider: patient timeout, no throttle/cache.
103
262
  async function defaultLLM(prompt) {
104
- const { callModelJSON } = await import('./haiku-client.mjs');
105
- return callModelJSON(prompt, 'haiku', { timeout: 12000, maxTokens: 400 });
263
+ return callRewriteLLM(prompt, { timeout: 12000 });
264
+ }
265
+
266
+ // Auto-path provider: fail-fast timeout + throttle. Built fresh per deepSearch
267
+ // call; the throttle clock it reads is module-global (per-process).
268
+ function makeAutoLlm() {
269
+ return makeThrottled((prompt) => callRewriteLLM(prompt, { timeout: AUTO_DEEP_TIMEOUT_MS }));
106
270
  }
107
271
 
108
272
  /**
@@ -113,11 +277,17 @@ async function defaultLLM(prompt) {
113
277
  * @param {object} [opts]
114
278
  * @param {(prompt: object) => Promise<object|null>} [opts.llm]
115
279
  * @param {number} [opts.retries=1]
280
+ * @param {boolean} [opts.cache=false] memoize successful rewrites (auto path)
116
281
  * @returns {Promise<string[]>}
117
282
  */
118
- export async function rewriteQuery(query, { llm = defaultLLM, retries = 1 } = {}) {
283
+ export async function rewriteQuery(query, { llm = defaultLLM, retries = 1, cache = false } = {}) {
119
284
  const original = String(query ?? '').trim();
120
285
  if (!original) return [];
286
+ const key = original.toLowerCase();
287
+ if (cache) {
288
+ const hit = cacheGet(key);
289
+ if (hit) return hit; // process-lifetime memo of a prior successful rewrite
290
+ }
121
291
  const prompt = buildRewritePrompt(original);
122
292
  for (let attempt = 0; attempt <= retries; attempt++) {
123
293
  let parsed;
@@ -127,7 +297,10 @@ export async function rewriteQuery(query, { llm = defaultLLM, retries = 1 } = {}
127
297
  parsed = null;
128
298
  }
129
299
  const variants = assembleVariants(original, parsed);
130
- if (variants.length > 1) return variants; // got at least one real rewrite
300
+ if (variants.length > 1) { // got at least one real rewrite
301
+ if (cache) cacheSet(key, variants); // cache successes only — failures retry next time
302
+ return variants;
303
+ }
131
304
  }
132
305
  return [original]; // robust floor — single-query == baseline
133
306
  }
@@ -212,13 +385,24 @@ function defaultSearchFn(db, query, params) {
212
385
  * @param {(prompt:object)=>Promise<object|null>} [deps.llm]
213
386
  * @param {(db:Database, query:string, params:object)=>Array} [deps.searchFn]
214
387
  * @param {number} [deps.rrfK=RRF_K]
388
+ * @param {boolean} [deps.auto=false] use the fail-fast/throttled/cached auto provider
215
389
  * @returns {Promise<{results: Array, variants: string[]}>}
216
390
  */
217
- export async function deepSearch(db, params, { llm = defaultLLM, searchFn = defaultSearchFn, rrfK = RRF_K } = {}) {
391
+ export async function deepSearch(db, params, { llm, searchFn = defaultSearchFn, rrfK = RRF_K, auto = false } = {}) {
218
392
  const query = String(params?.query ?? '').trim();
219
393
  if (!query) return { results: [], variants: [] };
220
394
 
221
- const variants = await rewriteQuery(query, { llm });
395
+ // No injected llm: EXPLICIT deep=true uses the patient defaultLLM; the AUTO
396
+ // path uses a fail-fast + throttled provider with no retry and a process-
397
+ // lifetime rewrite cache (D#40). An injected llm (tests) is used verbatim.
398
+ let rewriteLlm = llm;
399
+ let retries = 1;
400
+ let cache = false;
401
+ if (!rewriteLlm) {
402
+ if (auto) { rewriteLlm = makeAutoLlm(); retries = 0; cache = true; }
403
+ else rewriteLlm = defaultLLM;
404
+ }
405
+ const variants = await rewriteQuery(query, { llm: rewriteLlm, retries, cache });
222
406
  const lists = variants.map((v, i) => {
223
407
  // variant[0] is the ORIGINAL query: let an engine error propagate exactly as
224
408
  // it does on the single-query baseline path, so "never worse than baseline"
package/haiku-client.mjs CHANGED
@@ -5,7 +5,7 @@
5
5
  // Model configurable via CLAUDE_MEM_MODEL (haiku|sonnet); OpenRouter slug
6
6
  // overridable via OPENROUTER_MODEL
7
7
 
8
- import { execFileSync } from 'child_process';
8
+ import { execFileSync, spawn } from 'child_process';
9
9
  import { readFileSync } from 'fs';
10
10
  import { join } from 'path';
11
11
  import { randomUUID } from 'crypto';
@@ -247,6 +247,44 @@ export async function callModelJSON(prompt, model = 'haiku', opts) {
247
247
  return parseJsonFromLLM(result.text);
248
248
  }
249
249
 
250
+ /**
251
+ * JSON-returning, FULLY-ASYNC model call for the long-lived server hot path
252
+ * (deep-search auto-escalation). Like callModelJSON, but every CLI invocation —
253
+ * cli-mode primary AND the post-provider-failure fallback — uses the
254
+ * non-blocking callModelCLIAsync, so a keyed-provider outage can never drop onto
255
+ * the blocking execFileSync path and freeze the MCP event loop (D#40). Never
256
+ * throws; returns parsed JSON or null.
257
+ * @param {string|{system?:string,user:string}} prompt
258
+ * @param {'haiku'|'sonnet'} model
259
+ * @param {{timeout?:number,maxTokens?:number,temperature?:number}} [opts]
260
+ * @returns {Promise<object|null>}
261
+ */
262
+ export async function callModelJSONAsync(prompt, model = 'haiku', { timeout = 15000, maxTokens = 1000, temperature = DEFAULT_LLM_TEMPERATURE } = {}) {
263
+ if (!prompt) return null;
264
+ const resolvedModel = MODEL_MAP[model] ? model : 'haiku';
265
+ const mode = detectMode();
266
+
267
+ if (mode === 'cli') {
268
+ const res = await callModelCLIAsync(prompt, resolvedModel, { timeout });
269
+ return res?.text ? parseJsonFromLLM(res.text) : null;
270
+ }
271
+
272
+ // Keyed provider (api/openrouter): try it, then degrade to the ASYNC CLI on any
273
+ // failure — NOT the blocking execFileSync callModelCLI that callModelJSON uses.
274
+ let primary = null;
275
+ try {
276
+ primary = mode === 'api'
277
+ ? await callModelAPI(prompt, resolvedModel, { timeout, maxTokens, temperature })
278
+ : await callOpenRouterAPI(prompt, resolvedModel, { timeout, maxTokens, temperature });
279
+ } catch (e) {
280
+ debugCatch(e, `callModelJSONAsync:${mode}:${resolvedModel}`);
281
+ }
282
+ if (primary?.text) return parseJsonFromLLM(primary.text);
283
+
284
+ const res = await callModelCLIAsync(prompt, resolvedModel, { timeout });
285
+ return res?.text ? parseJsonFromLLM(res.text) : null;
286
+ }
287
+
250
288
  async function callModelAPI(prompt, model, { timeout, maxTokens, temperature = DEFAULT_LLM_TEMPERATURE }) {
251
289
  const apiKey = process.env.ANTHROPIC_API_KEY;
252
290
  if (!apiKey) return null;
@@ -319,6 +357,72 @@ function callModelCLI(prompt, model, { timeout }) {
319
357
  }
320
358
  }
321
359
 
360
+ /**
361
+ * Async, non-blocking sibling of callModelCLI for the long-lived MCP server hot
362
+ * path (deep-search auto-escalation, D#40). execFileSync blocks the event loop for
363
+ * the whole subprocess lifetime — acceptable in short-lived hook processes
364
+ * (callModelCLI), not inside an MCP request handler. Uses spawn + stdin so the
365
+ * untrusted query stays out of argv (ps-visible) and the boundary-marker model is
366
+ * preserved. Never rejects: resolves {text} on non-empty stdout, null on
367
+ * error/empty. On timeout it SIGKILLs the child with NO retry (fail-fast) and
368
+ * salvages a complete JSON payload from partial stdout (mirrors callModelCLI's
369
+ * catch-salvage; tolerant of Haiku's ```json fencing per #8605, which the upstream
370
+ * parseJsonFromLLM strips).
371
+ * @param {string|{system?:string,user:string}} prompt
372
+ * @param {'haiku'|'sonnet'} model
373
+ * @param {{timeout:number}} opts SIGKILL after `timeout` ms; no retry.
374
+ * @returns {Promise<{text:string}|null>}
375
+ */
376
+ export function callModelCLIAsync(prompt, model, { timeout }) {
377
+ return new Promise((resolve) => {
378
+ const modelName = MODEL_MAP[model] ? model : 'haiku';
379
+ let child;
380
+ try {
381
+ child = spawn(getClaudePath(), ['-p', '--model', modelName], {
382
+ env: { ...process.env, CLAUDE_MEM_HOOK_RUNNING: '1' },
383
+ cwd: '/tmp',
384
+ stdio: ['pipe', 'pipe', 'pipe'],
385
+ });
386
+ } catch (e) {
387
+ debugCatch(e, `${model}-cli-async`);
388
+ resolve(null);
389
+ return;
390
+ }
391
+ let stdout = '';
392
+ let settled = false;
393
+ const done = (val) => {
394
+ if (settled) return;
395
+ settled = true;
396
+ clearTimeout(timer);
397
+ resolve(val);
398
+ };
399
+ const timer = setTimeout(() => {
400
+ try { child.kill('SIGKILL'); } catch { /* already gone */ }
401
+ const t = stdout.trim();
402
+ if (t.startsWith('{') && t.endsWith('}')) {
403
+ try { JSON.parse(t); done({ text: t }); return; } catch { /* not complete JSON */ }
404
+ }
405
+ done(null);
406
+ }, timeout);
407
+ child.stdout?.setEncoding('utf8'); // decode multi-byte UTF-8 (CJK) across chunk boundaries
408
+ child.stdout?.on('data', (d) => { stdout += d; });
409
+ child.stderr?.on('data', () => {}); // drain stderr so a chatty child can't block on a full pipe
410
+ child.on('error', (e) => { debugCatch(e, `${model}-cli-async`); done(null); });
411
+ child.on('close', () => {
412
+ const t = stdout.trim();
413
+ done(t ? { text: t } : null);
414
+ });
415
+ // EPIPE guard: the child may exit before we finish writing stdin.
416
+ child.stdin?.on('error', () => {});
417
+ try {
418
+ child.stdin?.write(flattenForCLI(prompt));
419
+ child.stdin?.end();
420
+ } catch (e) {
421
+ debugCatch(e, `${model}-cli-async:stdin`);
422
+ }
423
+ });
424
+ }
425
+
322
426
  // ─── API Mode ────────────────────────────────────────────────────────────────
323
427
 
324
428
  async function callHaikuAPI(prompt, { timeout, maxTokens, temperature = DEFAULT_LLM_TEMPERATURE }) {
package/mem-cli.mjs CHANGED
@@ -10,7 +10,7 @@ import { TIER_CASE_SQL, tierSqlParams } from './tier.mjs';
10
10
  import { _resetVocabCache } from './tfidf.mjs';
11
11
  import { autoBoostIfNeeded, reRankWithContext, markSuperseded } from './server-internals.mjs';
12
12
  import { searchObservationsHybrid, countSearchTotal } from './search-engine.mjs';
13
- import { deepSearch } from './deep-search.mjs';
13
+ import { deepSearch, resolveDeepMode, shouldEscalateToDeep, autoDeepLlmReady, hasEscalatableCorpus } from './deep-search.mjs';
14
14
  import { ensureRegistryDb, upsertResource } from './registry.mjs';
15
15
  import { searchResources } from './registry-retriever.mjs';
16
16
  import { selectCompressionCandidates, groupByProjectWeek, compressGroup } from './lib/compress-core.mjs';
@@ -48,11 +48,11 @@ import {
48
48
 
49
49
  // ─── Commands ────────────────────────────────────────────────────────────────
50
50
 
51
- async function cmdSearch(db, args) {
51
+ async function cmdSearch(db, args, { llm } = {}) {
52
52
  const { positional, flags } = parseArgs(args);
53
53
  const query = positional.join(' ');
54
54
  if (!query) {
55
- fail('[mem] Usage: claude-mem-lite search <query> [--type TYPE] [--source SOURCE] [--limit N] [--project P] [--from DATE] [--to DATE] [--importance N] [--branch B] [--offset N] [--sort relevance|time|importance] [--include-noise] [--deep]');
55
+ fail('[mem] Usage: claude-mem-lite search <query> [--type TYPE] [--source SOURCE] [--limit N] [--project P] [--from DATE] [--to DATE] [--importance N] [--branch B] [--offset N] [--sort relevance|time|importance] [--include-noise] [--deep] [--no-deep]');
56
56
  return;
57
57
  }
58
58
 
@@ -103,7 +103,11 @@ async function cmdSearch(db, args) {
103
103
  // --deep: opt-in LLM multi-query / HyDE deep search (deep-search.mjs). Costs one
104
104
  // Haiku call + N hybrid searches; observations-only. NOT the passive path — this
105
105
  // is the explicit "search harder" lever for vocabulary-mismatch recall misses.
106
- const deep = flags.deep === true || flags.deep === 'true';
106
+ // --deep forces deep; --no-deep forces normal; neither = unset (env/default decide).
107
+ const explicitDeep = (flags.deep === true || flags.deep === 'true')
108
+ ? true
109
+ : ((flags['no-deep'] === true || flags['no-deep'] === 'true') ? false : undefined);
110
+ const deepMode = resolveDeepMode(explicitDeep, { surface: 'cli' });
107
111
 
108
112
  if (source && !['observations', 'sessions', 'prompts'].includes(source)) {
109
113
  fail(`[mem] Invalid --source "${source}". Use: observations, sessions, prompts`);
@@ -113,13 +117,13 @@ async function cmdSearch(db, args) {
113
117
  const ftsQuery = buildSearchFtsQuery(query, { or: useOr });
114
118
  // --deep proceeds even when the literal query sanitizes to nothing — its LLM
115
119
  // rewrite may still produce searchable variants (F3, parity with server.mjs).
116
- if (!ftsQuery && !deep) {
120
+ if (!ftsQuery && deepMode === 'normal') {
117
121
  fail(`[mem] No valid search terms in "${query}"`);
118
122
  return;
119
123
  }
120
124
  // --deep ignores --or: each variant runs AND + the engine's built-in
121
125
  // OR-fallback, so --or has no effect on the deep path — say so (F8).
122
- if (deep && useOr) {
126
+ if (deepMode === 'deep' && useOr) {
123
127
  process.stderr.write('[mem] Note: --or has no effect with --deep (variants use AND + engine OR-fallback)\n');
124
128
  }
125
129
 
@@ -135,10 +139,10 @@ async function cmdSearch(db, args) {
135
139
  // who passed --branch expecting a branch-scoped result.
136
140
  // --deep is observations-only (deepSearch fuses searchObservationsHybrid lists);
137
141
  // it overrides --source and the obs-only filter inference.
138
- if (deep && source && source !== 'observations') {
142
+ if (deepMode === 'deep' && source && source !== 'observations') {
139
143
  process.stderr.write(`[mem] Note: --deep searches observations only; ignoring --source ${source}\n`);
140
144
  }
141
- const effectiveSource = deep
145
+ const effectiveSource = deepMode === 'deep'
142
146
  ? 'observations'
143
147
  : (source || ((type || tier || minImportance || branch) ? 'observations' : null));
144
148
 
@@ -156,14 +160,29 @@ async function cmdSearch(db, args) {
156
160
  let orFallbackFired = false;
157
161
 
158
162
  let deepVariants = null;
163
+ let isDeep = deepMode === 'deep';
164
+
159
165
  // Search observations — shared engine with server.mjs (#8198/#8212 paired-path fix)
160
166
  if (!effectiveSource || effectiveSource === 'observations') {
161
- let obsResults;
162
- if (deep) {
163
- // Opt-in deep search: rewrite the query into variants (keyword / concept /
164
- // HyDE), run each through the hybrid engine, RRF-fuse. Collapses to the
165
- // single query when the rewrite yields nothing — never worse than baseline
166
- // (deep-search.mjs). Over-fetch perSourceLimit so the offset/slice below has room.
167
+ const obsCtx = {
168
+ ftsQuery,
169
+ args: {
170
+ project: project || null,
171
+ obs_type: type || null,
172
+ importance: minImportance || null,
173
+ branch: branch || null,
174
+ include_noise: includeNoise,
175
+ },
176
+ epochFrom: dateFrom,
177
+ epochTo: dateTo,
178
+ perSourceLimit,
179
+ perSourceOffset,
180
+ currentProject: project ? null : inferProject(),
181
+ limit,
182
+ orFallbackFired: false,
183
+ };
184
+
185
+ const runDeep = async ({ auto = false } = {}) => {
167
186
  const ds = await deepSearch(db, {
168
187
  query,
169
188
  project: project || null,
@@ -175,34 +194,27 @@ async function cmdSearch(db, args) {
175
194
  epochTo: dateTo,
176
195
  limit: perSourceLimit,
177
196
  currentProject: project ? null : inferProject(),
178
- });
179
- obsResults = ds.results;
197
+ }, llm ? { llm } : { auto });
180
198
  deepVariants = ds.variants;
181
199
  if (deepVariants.length > 1) {
182
200
  process.stderr.write(`[mem] Deep search: rewrote into ${deepVariants.length} query variants, RRF-fused\n`);
183
201
  } else {
184
202
  process.stderr.write('[mem] Deep search: rewrite returned no usable variants; used original query only\n');
185
203
  }
204
+ return ds.results;
205
+ };
206
+
207
+ let obsResults;
208
+ if (deepMode === 'deep') {
209
+ obsResults = await runDeep();
186
210
  } else {
187
- const obsCtx = {
188
- ftsQuery,
189
- args: {
190
- project: project || null,
191
- obs_type: type || null,
192
- importance: minImportance || null,
193
- branch: branch || null,
194
- include_noise: includeNoise,
195
- },
196
- epochFrom: dateFrom,
197
- epochTo: dateTo,
198
- perSourceLimit,
199
- perSourceOffset,
200
- currentProject: project ? null : inferProject(),
201
- limit,
202
- orFallbackFired: false,
203
- };
204
211
  obsResults = searchObservationsHybrid(db, obsCtx);
205
212
  if (obsCtx.orFallbackFired) orFallbackFired = true;
213
+ if (deepMode === 'auto' && autoDeepLlmReady(process.env, llm) && shouldEscalateToDeep(obsResults, obsCtx) && hasEscalatableCorpus(db, project || null)) {
214
+ process.stderr.write(`[mem] auto-escalated to deep search (weak results: ${obsResults.length} hits)\n`);
215
+ obsResults = await runDeep({ auto: true });
216
+ isDeep = true;
217
+ }
206
218
  }
207
219
  for (const r of obsResults) results.push({ ...r, _source: 'obs', score: r.score ?? 0 });
208
220
 
@@ -215,7 +227,7 @@ async function cmdSearch(db, args) {
215
227
  }
216
228
 
217
229
  // Search sessions (shared engine with MCP mem_search — lib/search-core.mjs)
218
- if (!effectiveSource || effectiveSource === 'sessions') {
230
+ if ((!effectiveSource || effectiveSource === 'sessions') && !isDeep) {
219
231
  try {
220
232
  const sessRows = searchSessionsFts(db, {
221
233
  ftsQuery, project, projectBoost: project ? null : inferProject(),
@@ -226,7 +238,7 @@ async function cmdSearch(db, args) {
226
238
  }
227
239
 
228
240
  // Search prompts (shared engine incl. CJK precision gate + LIKE fallback)
229
- if (!effectiveSource || effectiveSource === 'prompts') {
241
+ if ((!effectiveSource || effectiveSource === 'prompts') && !isDeep) {
230
242
  try {
231
243
  const promptRows = searchPromptsFts(db, {
232
244
  query, ftsQuery, project,
@@ -238,7 +250,7 @@ async function cmdSearch(db, args) {
238
250
 
239
251
  if (results.length === 0) {
240
252
  if (jsonOutput) {
241
- out(JSON.stringify({ query, total: 0, returned: 0, offset, limit, deep, variants: deep ? deepVariants : undefined, results: [] }));
253
+ out(JSON.stringify({ query, total: 0, returned: 0, offset, limit, deep: isDeep, variants: isDeep ? deepVariants : undefined, results: [] }));
242
254
  } else {
243
255
  out(`[mem] No results for "${query}"`);
244
256
  }
@@ -280,7 +292,7 @@ async function cmdSearch(db, args) {
280
292
  // in `results` (deep is obs-only). countSearchTotal would instead count the
281
293
  // ORIGINAL query's FTS matches — wrong, and ~0 on the vocabulary-mismatch
282
294
  // queries deep exists for, which falsely shrinks the "N of M" total (F1).
283
- const total = deep
295
+ const total = isDeep
284
296
  ? results.length
285
297
  : Math.max(countSearchTotal(db, {
286
298
  effectiveSource,
@@ -296,7 +308,7 @@ async function cmdSearch(db, args) {
296
308
 
297
309
  if (paged.length === 0) {
298
310
  if (jsonOutput) {
299
- out(JSON.stringify({ query, total, returned: 0, offset, limit, deep, variants: deep ? deepVariants : undefined, results: [] }));
311
+ out(JSON.stringify({ query, total, returned: 0, offset, limit, deep: isDeep, variants: isDeep ? deepVariants : undefined, results: [] }));
300
312
  } else {
301
313
  out(`[mem] No results for "${query}" at offset ${offset}`);
302
314
  }
@@ -339,8 +351,8 @@ async function cmdSearch(db, args) {
339
351
  returned: paged.length,
340
352
  offset,
341
353
  limit,
342
- deep,
343
- variants: deep ? deepVariants : undefined,
354
+ deep: isDeep,
355
+ variants: isDeep ? deepVariants : undefined,
344
356
  relaxed_and_to_or: orFallbackFired && !useOr,
345
357
  mixed_sources: hasMixed,
346
358
  results: items,
@@ -504,6 +516,9 @@ const OBS_FIELDS = ['id', 'type', 'title', 'subtitle', 'narrative', 'text', 'fac
504
516
  // top; re-exported here for back-compat with existing importers
505
517
  // (tests/get-time-format.test.mjs).
506
518
  export { OBS_TIME_FIELDS, formatObsFieldValue };
519
+ // Test seam: exposes cmdSearch with the llm injection slot without going through
520
+ // ensureDb — lets hermetic tests pass a seeded :memory: db and a stub llm.
521
+ export async function cmdSearchForTest(db, args, opts) { return cmdSearch(db, args, opts); }
507
522
 
508
523
  function renderObsRows(db, ids, requestedFields) {
509
524
  const placeholders = ids.map(() => '?').join(',');
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "claude-mem-lite",
3
- "version": "3.1.2",
3
+ "version": "3.3.0",
4
4
  "description": "Persistent long-term memory for Claude Code via MCP — captures coding decisions, bugfixes, and context across sessions. Hybrid FTS5 + TF-IDF search with episode batching. Single SQLite DB, no external services. A lighter, lower-cost alternative to claude-mem (episode batching + a smaller model; cost savings are an internal estimate, not a measured benchmark).",
5
5
  "type": "module",
6
6
  "packageManager": "npm@10.9.2",
package/server.mjs CHANGED
@@ -10,7 +10,7 @@ import { resolveProject as _resolveProjectShared } from './project-utils.mjs';
10
10
  import { ensureDb, DB_PATH, DB_DIR, REGISTRY_DB_PATH } from './schema.mjs';
11
11
  import { reRankWithContext, markSuperseded, autoBoostIfNeeded, runIdleCleanup, buildServerInstructions } from './server-internals.mjs';
12
12
  import { searchObservationsHybrid, countSearchTotal } from './search-engine.mjs';
13
- import { deepSearch } from './deep-search.mjs';
13
+ import { deepSearch, resolveDeepMode, shouldEscalateToDeep, autoDeepLlmReady, hasEscalatableCorpus } from './deep-search.mjs';
14
14
  import { selectCompressionCandidates, groupByProjectWeek, compressGroup } from './lib/compress-core.mjs';
15
15
  import { resolveAnchorToken, formatAnchorError, resolveQueryAnchor, fetchRecentTimeline, fetchTimelineWindow } from './lib/timeline-core.mjs';
16
16
  import { buildSearchFtsQuery, parseDateBounds, computePerSourceWindow, effectiveObsFtsQuery, searchSessionsFts, searchPromptsFts, normalizeCrossSourceScores, applyUserSort, applyTierFilter } from './lib/search-core.mjs';
@@ -168,16 +168,19 @@ function safeHandler(fn) {
168
168
 
169
169
  // Thin wrapper around the shared engine — keeps the existing call sites
170
170
  // (searchObservations(ctx)) without ferrying `db` through every layer.
171
+ // ctx.db is set by runSearchPipeline when an injected db is present (e.g. tests);
172
+ // falls back to the module-level db for the normal MCP handler path.
171
173
  function searchObservations(ctx) {
172
- return searchObservationsHybrid(db, ctx);
174
+ return searchObservationsHybrid(ctx.db ?? db, ctx);
173
175
  }
174
176
 
175
177
  function searchSessions(ctx) {
178
+ const _db = ctx.db ?? db;
176
179
  const { ftsQuery, searchType, args, epochFrom, epochTo, perSourceLimit, perSourceOffset, currentProject } = ctx;
177
180
  const results = [];
178
181
 
179
182
  if (ftsQuery) {
180
- const rows = searchSessionsFts(db, {
183
+ const rows = searchSessionsFts(_db, {
181
184
  ftsQuery, project: args.project ?? null,
182
185
  projectBoost: args.project ? null : currentProject,
183
186
  epochFrom, epochTo, perSourceLimit, perSourceOffset,
@@ -195,7 +198,7 @@ function searchSessions(ctx) {
195
198
  if (epochTo !== null) { wheres.push('created_at_epoch <= ?'); params.push(epochTo); }
196
199
  const where = wheres.length ? `WHERE ${wheres.join(' AND ')}` : '';
197
200
  params.push(perSourceLimit, perSourceOffset);
198
- const rows = db.prepare(`
201
+ const rows = _db.prepare(`
199
202
  SELECT id, request, completed, project, created_at, created_at_epoch
200
203
  FROM session_summaries ${where}
201
204
  ORDER BY created_at_epoch DESC
@@ -210,13 +213,14 @@ function searchSessions(ctx) {
210
213
  }
211
214
 
212
215
  function searchPrompts(ctx) {
216
+ const _db = ctx.db ?? db;
213
217
  const { ftsQuery, searchType, args, epochFrom, epochTo, perSourceLimit, perSourceOffset } = ctx;
214
218
  const results = [];
215
219
 
216
220
  if (ftsQuery) {
217
221
  // CJK precision gate + LIKE fallback live in the shared core (see
218
222
  // lib/search-core.mjs for the leak rationale).
219
- const rows = searchPromptsFts(db, {
223
+ const rows = searchPromptsFts(_db, {
220
224
  query: args.query, ftsQuery, project: args.project ?? null,
221
225
  epochFrom, epochTo, perSourceLimit, perSourceOffset,
222
226
  });
@@ -231,7 +235,7 @@ function searchPrompts(ctx) {
231
235
  if (epochTo !== null) { wheres.push('p.created_at_epoch <= ?'); params.push(epochTo); }
232
236
  const where = wheres.length ? `WHERE ${wheres.join(' AND ')}` : '';
233
237
  params.push(perSourceLimit, perSourceOffset);
234
- const rows = db.prepare(`
238
+ const rows = _db.prepare(`
235
239
  SELECT p.id, p.prompt_text, p.content_session_id, p.created_at, p.created_at_epoch
236
240
  FROM user_prompts p
237
241
  JOIN sdk_sessions s ON p.content_session_id = s.content_session_id
@@ -247,10 +251,10 @@ function searchPrompts(ctx) {
247
251
  return results;
248
252
  }
249
253
 
250
- function formatSearchOutput(paginatedResults, args, ftsQuery, totalCount, orFallbackFired = false) {
254
+ function formatSearchOutput(paginatedResults, args, ftsQuery, totalCount, orFallbackFired = false, isDeepSearch = false) {
251
255
  if (paginatedResults.length === 0) {
252
256
  const hint = [];
253
- if (args.deep) {
257
+ if (isDeepSearch) {
254
258
  // Deep search runs even when the literal query sanitizes to empty, so the
255
259
  // "query was filtered" hint below would be misleading — the LLM rewrite ran
256
260
  // N variants and simply found nothing (F9).
@@ -310,13 +314,17 @@ function formatSearchOutput(paginatedResults, args, ftsQuery, totalCount, orFall
310
314
 
311
315
  // ─── Tool: mem_search ───────────────────────────────────────────────────────
312
316
 
313
- server.registerTool(
314
- 'mem_search',
315
- {
316
- description: descriptionOf('mem_search'),
317
- inputSchema: memSearchSchema,
318
- },
319
- safeHandler(async (args) => {
317
+ // Exported for tests: runs the full mem_search pipeline against an explicit db
318
+ // with an optional injected llm (deepSearch dependency). The MCP tool handler
319
+ // calls this with the module db and the default llm.
320
+ // NOTE: resolveProject() inside runSearchPipeline closes over the module-level `db`,
321
+ // not the injected one. Tests that pass a project: arg via this seam will trigger
322
+ // resolveProject() against the real (module) DB, not the test DB.
323
+ export async function handleSearchForTest(db, args, { llm } = {}) {
324
+ return runSearchPipeline(db, args, { llm });
325
+ }
326
+
327
+ async function runSearchPipeline(db, args, { llm } = {}) {
320
328
  if (args.project) args = { ...args, project: resolveProject(args.project) };
321
329
  const limit = args.limit ?? 20;
322
330
  const offset = args.offset ?? 0;
@@ -338,46 +346,75 @@ server.registerTool(
338
346
  if (!bounds.ok) throw new Error(`Invalid date_${bounds.bad}: "${bounds.value}" (use ISO 8601 or YYYY-MM-DD)`);
339
347
  const { epochFrom, epochTo } = bounds;
340
348
 
349
+ // Resolve tri-state deep mode. MCP defaults to 'auto' (escalate on weak results)
350
+ // unless explicitly overridden via args.deep or CLAUDE_MEM_AUTO_DEEP env flag.
351
+ const deepMode = resolveDeepMode(args.deep, { surface: 'mcp' });
352
+
341
353
  // Early return when query was provided but sanitized to nothing (all FTS5
342
- // keywords/special chars). Skipped for deep search its LLM rewrite may
343
- // still produce searchable variants from a query the FTS sanitizer rejects.
344
- if (args.query && !ftsQuery && !epochFrom && !epochTo && !args.obs_type && !args.importance && !args.deep) {
345
- return formatSearchOutput([], args, ftsQuery, 0);
354
+ // keywords/special chars). Skipped for deep/autodeep's LLM rewrite may
355
+ // still produce searchable variants from a query the FTS sanitizer rejects,
356
+ // and auto could escalate similarly.
357
+ if (args.query && !ftsQuery && !epochFrom && !epochTo && !args.obs_type && !args.importance && deepMode === 'normal') {
358
+ return { ...formatSearchOutput([], args, ftsQuery, 0), escalated: false, results: [], total: 0, variants: null };
346
359
  }
347
360
 
348
361
  // When obs_type is specified, implicitly restrict to observations only.
349
- // --deep is observations-only too (deepSearch fuses hybrid-obs lists).
350
- const effectiveType = args.deep ? 'observations' : (searchType || (args.obs_type ? 'observations' : undefined));
362
+ // deep mode is observations-only too (deepSearch fuses hybrid-obs lists).
363
+ const effectiveType = deepMode === 'deep' ? 'observations' : (searchType || (args.obs_type ? 'observations' : undefined));
351
364
  const isCrossSource = !effectiveType;
352
- const ctx = { ftsQuery, searchType: effectiveType, args, epochFrom, epochTo, perSourceLimit, perSourceOffset, currentProject, limit };
365
+ const ctx = { db, ftsQuery, searchType: effectiveType, args, epochFrom, epochTo, perSourceLimit, perSourceOffset, currentProject, limit };
353
366
  const results = [];
354
367
  let deepVariants = null;
368
+ let isDeep = deepMode === 'deep';
369
+ let escalated = false;
370
+ let escalatedObsCount = 0;
371
+
372
+ // Helper: run deepSearch and load results into the shared `results` array.
373
+ const runDeepInto = async ({ auto = false } = {}) => {
374
+ const { results: deepRows, variants } = await deepSearch(db, {
375
+ query: args.query,
376
+ project: args.project || null,
377
+ type: args.obs_type || null,
378
+ importance: args.importance || null,
379
+ branch: args.branch || null,
380
+ includeNoise: args.include_noise === true,
381
+ epochFrom, epochTo,
382
+ limit: perSourceLimit,
383
+ currentProject,
384
+ }, llm ? { llm } : { auto });
385
+ // Safe to reset: sessions/prompts are pushed AFTER the obs block, so nothing is lost here.
386
+ results.length = 0;
387
+ results.push(...deepRows);
388
+ deepVariants = variants;
389
+ };
355
390
 
356
391
  if (!effectiveType || effectiveType === 'observations') {
357
- if (args.deep) {
392
+ if (deepMode === 'deep') {
358
393
  // Opt-in LLM multi-query/HyDE deep search: rewrite → per-variant hybrid
359
394
  // search → RRF fusion, collapsing to the single query (== baseline) when
360
395
  // the rewrite yields nothing (deep-search.mjs). Over-fetch perSourceLimit
361
396
  // so the pagination slice below has room.
362
- const { results: deepRows, variants } = await deepSearch(db, {
363
- query: args.query,
364
- project: args.project || null,
365
- type: args.obs_type || null,
366
- importance: args.importance || null,
367
- branch: args.branch || null,
368
- includeNoise: args.include_noise === true,
369
- epochFrom, epochTo,
370
- limit: perSourceLimit,
371
- currentProject,
372
- });
373
- results.push(...deepRows);
374
- deepVariants = variants;
397
+ await runDeepInto();
375
398
  } else {
376
399
  results.push(...searchObservations(ctx));
400
+ // Auto-escalate: if normal search is weak (too few results or OR fallback
401
+ // fired — a vocabulary-mismatch symptom), escalate to deep. ctx is mutated
402
+ // by searchObservations to set ctx.orFallbackFired when the AND→OR relaxation
403
+ // fires, so we read it here after the call.
404
+ // results is already obs-only here (sessions/prompts pushed below), but the
405
+ // filter makes the invariant explicit and robust to future reordering.
406
+ const obsCountBeforeEscalation = results.length;
407
+ if (deepMode === 'auto' && autoDeepLlmReady(process.env, llm) && shouldEscalateToDeep(results.filter(r => r.source === 'obs'), ctx) && hasEscalatableCorpus(db, args.project || null)) {
408
+ await runDeepInto({ auto: true });
409
+ isDeep = true;
410
+ escalated = true;
411
+ escalatedObsCount = obsCountBeforeEscalation;
412
+ }
377
413
  }
378
414
  }
379
- if (!effectiveType || effectiveType === 'sessions') results.push(...searchSessions(ctx));
380
- if (!effectiveType || effectiveType === 'prompts') results.push(...searchPrompts(ctx));
415
+ // Sessions and prompts are excluded when deep (obs-only invariant, #8735).
416
+ if ((!effectiveType || effectiveType === 'sessions') && !isDeep) results.push(...searchSessions(ctx));
417
+ if ((!effectiveType || effectiveType === 'prompts') && !isDeep) results.push(...searchPrompts(ctx));
381
418
 
382
419
  // Type-list fallback: when obs_type is specified and FTS finds nothing,
383
420
  // list recent observations of that type (user likely wants to browse by type)
@@ -421,7 +458,7 @@ server.registerTool(
421
458
  // ftsQuery but the rewrite still returned rows (F2). reRankWithContext + the
422
459
  // re-sort are FTS-rank operations; deep rows are already RRF-ranked, so on the
423
460
  // empty-ftsQuery deep path we tag-but-don't-reorder (keep RRF order).
424
- if ((ftsQuery || args.deep) && results.some(r => r.source === 'obs')) {
461
+ if ((ftsQuery || isDeep) && results.some(r => r.source === 'obs')) {
425
462
  const obsResults = results.filter(r => r.source === 'obs');
426
463
  if (ftsQuery) reRankWithContext(db, obsResults, currentProject);
427
464
  markSuperseded(obsResults);
@@ -445,11 +482,11 @@ server.registerTool(
445
482
  // results.length is NOT the population — count the real MATCH set instead. Clamp
446
483
  // to >= results.length so vector/concept-augmented obs rows are never undercounted.
447
484
  // (paired-path with mem-cli.mjs via shared countSearchTotal — #8217)
448
- // For --deep the population is the fused variant set already in `results`
449
- // (deep is obs-only, returned by deepSearch capped at perSourceLimit).
450
- // countSearchTotal would count the ORIGINAL query's FTS matches instead —
451
- // wrong, and ~0 on the vocabulary-mismatch queries deep exists for (F1).
452
- const totalBeforePagination = args.deep
485
+ // For deep (explicit or auto-escalated), the population is the fused variant set
486
+ // already in `results` (deep is obs-only, returned by deepSearch capped at
487
+ // perSourceLimit). countSearchTotal would count the ORIGINAL query's FTS matches
488
+ // instead — wrong, and ~0 on the vocabulary-mismatch queries deep exists for (F1).
489
+ const totalBeforePagination = isDeep
453
490
  ? results.length
454
491
  : Math.max(countSearchTotal(db, {
455
492
  effectiveSource: effectiveType || null,
@@ -463,16 +500,32 @@ server.registerTool(
463
500
  // Always apply pagination — single-source results can exceed SQL LIMIT due to expansion (concept co-occurrence, PRF, vector search)
464
501
  const paginatedResults = (offset > 0 || results.length > limit) ? results.slice(offset, offset + limit) : results;
465
502
 
466
- const output = formatSearchOutput(paginatedResults, args, ftsQuery, totalBeforePagination, ctx.orFallbackFired === true);
503
+ // Observability: announce auto-escalation on stderr (parity with CLI deep note).
504
+ if (escalated) process.stderr.write(`[mem] auto-escalated to deep search (weak results: ${escalatedObsCount} hits)\n`);
505
+
506
+ const output = formatSearchOutput(paginatedResults, args, ftsQuery, totalBeforePagination, ctx.orFallbackFired === true, isDeep);
467
507
  // Surface the rewrite to the calling agent (CLI prints this to stderr + JSON;
468
508
  // MCP had no signal at all — F13). Tells the agent whether deep actually
469
509
  // reformulated the query or collapsed to the single-query baseline.
470
- if (args.deep && deepVariants && output.content?.[0]?.type === 'text') {
510
+ if (isDeep && deepVariants && output.content?.[0]?.type === 'text') {
471
511
  output.content[0].text += deepVariants.length > 1
472
512
  ? `\n\n[deep search: rewrote into ${deepVariants.length} variants — ${deepVariants.slice(1).map(v => JSON.stringify(v)).join(', ')}]`
473
513
  : '\n\n[deep search: rewrite produced no usable variants; searched the original query only (== baseline)]';
474
514
  }
475
- return output;
515
+
516
+ // Return an object that exposes structured fields for tests + the MCP content blob.
517
+ return { ...output, results: paginatedResults, total: totalBeforePagination, escalated, variants: deepVariants };
518
+ }
519
+
520
+ server.registerTool(
521
+ 'mem_search',
522
+ {
523
+ description: descriptionOf('mem_search'),
524
+ inputSchema: memSearchSchema,
525
+ },
526
+ safeHandler(async (args) => {
527
+ const result = await runSearchPipeline(db, args, {});
528
+ return { content: result.content };
476
529
  })
477
530
  );
478
531
 
package/tool-schemas.mjs CHANGED
@@ -93,7 +93,7 @@ export const memSearchSchema = {
93
93
  sort: z.enum(['relevance', 'time', 'importance']).optional().describe('Sort order: relevance (default, BM25), time (newest first), importance (highest first)'),
94
94
  include_noise: z.boolean().optional().describe('Include hook-llm fallback titles ("Modified X", "Worked on X", raw error logs) — hidden by default as they have ~3% access rate'),
95
95
  or: coerceBool.optional().describe('Force OR semantics between query terms from the start (default: AND with automatic OR-fallback when AND returns 0). Aligns with CLI --or.'),
96
- deep: coerceBool.optional().describe('Opt-in LLM multi-query/HyDE deep search: one Haiku call rewrites the query into keyword/concept/HyDE variants, each runs the hybrid search, results RRF-fused. Observations-only; costs a Haiku call + seconds of latency. Use ONLY when a normal search missed because your wording differs from the stored terms (vocabulary mismatch). Default false; passive recall stays single-query.'),
96
+ deep: coerceBool.optional().describe('Tri-state LLM multi-query/HyDE deep search (observations-only). true=force; false=never; omit=AUTO (default ON for mem_search): a normal search that returns weak/few results auto-escalates with ONE Haiku call (query rewritten to keyword/concept/HyDE variants, RRF-fused). Set CLAUDE_MEM_AUTO_DEEP=0 to disable AUTO. Passive recall stays single-query.'),
97
97
  };
98
98
 
99
99
  export const memRecentSchema = {
@@ -350,7 +350,7 @@ export const tools = [
350
350
  ' - Investigating a concrete error keyword with obs_type="bugfix"\n' +
351
351
  ' - Looking for prior art on a module/feature before refactoring\n' +
352
352
  ' - User asks "have we seen this before" or references something not in visible context\n' +
353
- ' - A normal search missed — set deep=true to LLM-rewrite the query (slower)\n' +
353
+ ' - A normal search missed — weak results auto-escalate to deep (set deep=false to opt out)\n' +
354
354
  '\n' +
355
355
  'Equivalent CLI: ' + CLI_INVOKE + ' search "<query>" [--type bugfix] [--deep]',
356
356
  inputSchema: memSearchSchema,