npm - claude-mem-lite - Versions diffs - 3.1.2 → 3.3.0 - Mend

claude-mem-lite 3.1.2 → 3.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

package/.claude-plugin/marketplace.json +1 -1
package/.claude-plugin/plugin.json +1 -1
package/deep-search.mjs +193 -9
package/haiku-client.mjs +105 -1
package/mem-cli.mjs +55 -40
package/package.json +1 -1
package/server.mjs +100 -47
package/tool-schemas.mjs +2 -2

package/.claude-plugin/marketplace.json CHANGED Viewed

@@ -10,7 +10,7 @@
   "plugins": [
     {
       "name": "claude-mem-lite",
-      "version": "3.1.2",
+      "version": "3.3.0",
       "source": "./",
       "description": "Persistent long-term memory for Claude Code via MCP — captures coding decisions, bugfixes, and context across sessions. Hybrid FTS5 + TF-IDF search with episode batching. Single SQLite DB, no external services. A lighter, lower-cost alternative to claude-mem (episode batching + a smaller model; cost savings are an internal estimate, not a measured benchmark)."
     }

package/.claude-plugin/plugin.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "claude-mem-lite",
-  "version": "3.1.2",
+  "version": "3.3.0",
   "description": "Persistent long-term memory for Claude Code via MCP — captures coding decisions, bugfixes, and context across sessions. Hybrid FTS5 + TF-IDF search with episode batching. Single SQLite DB, no external services. A lighter, lower-cost alternative to claude-mem (episode batching + a smaller model; cost savings are an internal estimate, not a measured benchmark).",
   "author": {
     "name": "sdsrss"

package/deep-search.mjs CHANGED Viewed

@@ -38,6 +38,107 @@ import { RRF_K } from './tfidf.mjs';
 // original + up to 3 rewrites (keyword / concept-expansion / HyDE).
 export const MAX_VARIANTS = 4;
+// ─── Auto-escalation (opt-in adaptive deep search) ──────────────────────────
+// Result-count floor below which a normal search is "weak" enough to auto-escalate
+// to deepSearch. Calibrated against the deep-search benchmark fixtures; 3 is the
+// starting point (vocabulary-mismatch misses typically return 0-2 obs rows).
+export const AUTO_DEEP_MIN_RESULTS = 3;
+// Corpus-size floor below which auto-escalation is skipped entirely.
+// A near-empty store can't be rescued by HyDE/multi-query, so the Haiku call
+// would be wasted. Project-scoped when a project arg is provided, else global.
+export const AUTO_DEEP_MIN_CORPUS = 10;
+/**
+ * Cheap guard: does the project have enough stored observations for deep search
+ * to plausibly help? A near-empty store can't be rescued by HyDE/multi-query —
+ * skip escalation (and its Haiku call) there. Project-scoped when `project` is
+ * given, else global. Counts only live obs (not superseded/compressed).
+ * @returns {boolean} true if count >= min
+ */
+export function hasEscalatableCorpus(db, project, min = AUTO_DEEP_MIN_CORPUS) {
+  try {
+    const where = ['superseded_at IS NULL', 'COALESCE(compressed_into, 0) = 0'];
+    const params = [];
+    if (project) { where.push('project = ?'); params.push(project); }
+    const row = db.prepare(`SELECT COUNT(*) AS c FROM observations WHERE ${where.join(' AND ')}`).get(...params);
+    return (row?.c ?? 0) >= min;
+  } catch { return true; } // on any error, don't suppress escalation (fail open)
+}
+/**
+ * Is a usable LLM available for AUTO escalation? True when a stub/real llm is
+ * injected (tests), a FAST provider key is set, OR the claude-CLI fallback is
+ * enabled (D#40: default-on for CLI-auth users; kill switch
+ * CLAUDE_MEM_AUTO_DEEP_CLI=0). The CLI path is made safe for the long-lived
+ * server hot path by the async/fail-fast/throttled auto provider (deepSearch
+ * auto), not by being excluded as it was before D#40.
+ * @param {object} [env=process.env]
+ * @param {Function|undefined} [injectedLlm]
+ * @returns {boolean}
+ */
+export function autoDeepLlmReady(env = process.env, injectedLlm) {
+  if (injectedLlm) return true;
+  if (env.ANTHROPIC_API_KEY || env.OPENROUTER_API_KEY) return true;
+  // No provider key → detectMode() would be 'cli'. CLI-auth users get auto
+  // escalation by default; the burst/latency cost is bounded by the auto
+  // provider (fail-fast + throttle) and a failed rewrite degrades to baseline.
+  // Kill switch honors the common disable spellings, not just the exact '0'.
+  const off = String(env.CLAUDE_MEM_AUTO_DEEP_CLI ?? '').trim().toLowerCase();
+  return !(off === '0' || off === 'false' || off === 'no' || off === 'off');
+}
+/**
+ * Zero-LLM heuristic: are the normal-search results weak enough to warrant
+ * auto-escalating to deepSearch? Reads ONLY rows already in hand. Never calls
+ * an LLM, so the decision itself is free — only a positive verdict costs a
+ * Haiku call (the escalation).
+ *
+ * Weak when: too few results (count below minResults floor).
+ *
+ * NOTE: ctx.orFallbackFired was intentionally removed as an escalation trigger.
+ * orFallbackFired fires on SUCCESSFUL AND→OR recovery — when the fallback
+ * returns enough results it is a sign the query is working, not that it is
+ * weak. Escalating on a successful recovery (a) discards good results already
+ * in hand, (b) fires an unwanted LLM call, and (c) erases the AND→OR hint
+ * that surfaces to the caller. The genuinely-weak vocab-mismatch case (AND
+ * fails, OR also fails) is still caught: if OR recovers nothing, count is 0-2
+ * → escalates on count alone.
+ *
+ * @param {Array} results  normal-search rows
+ * @param {object} ctx     the hybrid ctx the engine mutated (unused; kept for
+ *                         backward-compat with callers that pass it)
+ * @param {object} [opts]
+ * @param {number} [opts.minResults=AUTO_DEEP_MIN_RESULTS]
+ * @returns {boolean}
+ */
+export function shouldEscalateToDeep(results, _ctx, { minResults = AUTO_DEEP_MIN_RESULTS } = {}) {
+  const n = Array.isArray(results) ? results.length : 0;
+  if (n < minResults) return true;
+  return false;
+}
+/**
+ * Resolve the tri-state deep mode. Precedence: explicit value > env flag >
+ * per-surface default.
+ * @param {boolean|undefined} explicitDeep  caller's deep value (undefined = not passed)
+ * @param {object} opts
+ * @param {'mcp'|'cli'} opts.surface
+ * @param {object} [opts.env=process.env]
+ * @returns {'deep'|'auto'|'normal'}
+ *   'deep'   — force deepSearch
+ *   'auto'   — run normal search, escalate if weak
+ *   'normal' — run normal search, never escalate
+ */
+export function resolveDeepMode(explicitDeep, { surface, env = process.env } = {}) {
+  if (explicitDeep === true) return 'deep';
+  if (explicitDeep === false) return 'normal';
+  const flag = env.CLAUDE_MEM_AUTO_DEEP;
+  if (flag === '0') return 'normal';
+  if (flag === '1') return 'auto';
+  return surface === 'mcp' ? 'auto' : 'normal';
+}
 // Echoes hook-llm.mjs MEMORY_INPUT_GUARD (kept inline rather than imported so
 // this module — and the tests that import it — never pull in hook-llm's
 // native-heavy chain; see #8729). Same security intent: the query is untrusted.
@@ -97,12 +198,75 @@ export function assembleVariants(query, parsed, { max = MAX_VARIANTS } = {}) {
   return out;
 }
-// Default provider: pulled in lazily so importing deep-search.mjs (e.g. in tests
-// with an injected llm) never loads the LLM client. callModelJSON returns parsed
-// JSON or null, and never throws.
+// ─── Auto-escalation safety machinery (D#40) ─────────────────────────────────
+// The AUTO path can fire on every weak search across the long-lived MCP server,
+// so it must be fail-fast (short timeout, no retry), throttled (bound bursts),
+// and cached (skip repeat rewrites). The EXPLICIT deep=true path stays patient.
+export const AUTO_DEEP_TIMEOUT_MS = 5000;   // fail-fast budget for the auto path; no retry
+export const AUTO_DEEP_THROTTLE_MS = 3000;  // min gap between auto LLM rewrites, per process (bounds spawn rate)
+const REWRITE_CACHE_MAX = 256;              // LRU cap for the query→variants cache
+let _lastAutoLlmAt = 0;
+const _rewriteCache = new Map(); // normalized query → variants (string[]); successes only
+/** Reset auto-path throttle + cache. Test-only; production state is per-process. */
+export function _resetAutoDeepState() { _lastAutoLlmAt = 0; _rewriteCache.clear(); }
+function cacheGet(key) {
+  if (!_rewriteCache.has(key)) return null;
+  const v = _rewriteCache.get(key);
+  _rewriteCache.delete(key); _rewriteCache.set(key, v); // LRU bump
+  return v.slice();
+}
+function cacheSet(key, variants) {
+  if (_rewriteCache.has(key)) _rewriteCache.delete(key);
+  _rewriteCache.set(key, variants.slice());
+  if (_rewriteCache.size > REWRITE_CACHE_MAX) {
+    _rewriteCache.delete(_rewriteCache.keys().next().value); // evict oldest
+  }
+}
+/**
+ * Wrap an llm so it fires at most once per `intervalMs` per process. A throttled
+ * call resolves null → rewriteQuery degrades to baseline (never worse). Exported
+ * for tests. Throttle state is module-global (shared across deepSearch calls).
+ *
+ * The clock advances on every ACTUAL call — success OR failure — deliberately:
+ * the throttle bounds the subprocess SPAWN RATE, and a failed spawn still costs a
+ * subprocess + its timeout, so a broken provider that always fails must be rate-
+ * limited too (gating only on success would let a persistent failure spawn on
+ * every weak search). The interval is kept short so one failure suppresses
+ * escalation only briefly, not for a long window.
+ */
+export function makeThrottled(llm, { intervalMs = AUTO_DEEP_THROTTLE_MS } = {}) {
+  return async (prompt) => {
+    const now = Date.now();
+    if (now - _lastAutoLlmAt < intervalMs) return null;
+    _lastAutoLlmAt = now;
+    return llm(prompt);
+  };
+}
+// Run one rewrite LLM call via the fully-async dispatcher (callModelJSONAsync):
+// every CLI invocation — cli-mode primary AND the post-provider-failure fallback
+// — is non-blocking, so an MCP request handler never blocks the event loop even
+// under a keyed-provider outage (D#40). Lazy import so tests with an injected llm
+// never load the LLM client.
+async function callRewriteLLM(prompt, { timeout }) {
+  const { callModelJSONAsync } = await import('./haiku-client.mjs');
+  return callModelJSONAsync(prompt, 'haiku', { timeout, maxTokens: 400 });
+}
+// Default (explicit deep=true) provider: patient timeout, no throttle/cache.
 async function defaultLLM(prompt) {
-  const { callModelJSON } = await import('./haiku-client.mjs');
-  return callModelJSON(prompt, 'haiku', { timeout: 12000, maxTokens: 400 });
+  return callRewriteLLM(prompt, { timeout: 12000 });
+}
+// Auto-path provider: fail-fast timeout + throttle. Built fresh per deepSearch
+// call; the throttle clock it reads is module-global (per-process).
+function makeAutoLlm() {
+  return makeThrottled((prompt) => callRewriteLLM(prompt, { timeout: AUTO_DEEP_TIMEOUT_MS }));
 }
 /**
@@ -113,11 +277,17 @@ async function defaultLLM(prompt) {
  * @param {object} [opts]
  * @param {(prompt: object) => Promise<object|null>} [opts.llm]
  * @param {number} [opts.retries=1]
+ * @param {boolean} [opts.cache=false]  memoize successful rewrites (auto path)
  * @returns {Promise<string[]>}
  */
-export async function rewriteQuery(query, { llm = defaultLLM, retries = 1 } = {}) {
+export async function rewriteQuery(query, { llm = defaultLLM, retries = 1, cache = false } = {}) {
   const original = String(query ?? '').trim();
   if (!original) return [];
+  const key = original.toLowerCase();
+  if (cache) {
+    const hit = cacheGet(key);
+    if (hit) return hit; // process-lifetime memo of a prior successful rewrite
+  }
   const prompt = buildRewritePrompt(original);
   for (let attempt = 0; attempt <= retries; attempt++) {
     let parsed;
@@ -127,7 +297,10 @@ export async function rewriteQuery(query, { llm = defaultLLM, retries = 1 } = {}
       parsed = null;
     }
     const variants = assembleVariants(original, parsed);
-    if (variants.length > 1) return variants; // got at least one real rewrite
+    if (variants.length > 1) { // got at least one real rewrite
+      if (cache) cacheSet(key, variants); // cache successes only — failures retry next time
+      return variants;
+    }
   }
   return [original]; // robust floor — single-query == baseline
 }
@@ -212,13 +385,24 @@ function defaultSearchFn(db, query, params) {
  * @param {(prompt:object)=>Promise<object|null>} [deps.llm]
  * @param {(db:Database, query:string, params:object)=>Array} [deps.searchFn]
  * @param {number} [deps.rrfK=RRF_K]
+ * @param {boolean} [deps.auto=false]  use the fail-fast/throttled/cached auto provider
  * @returns {Promise<{results: Array, variants: string[]}>}
  */
-export async function deepSearch(db, params, { llm = defaultLLM, searchFn = defaultSearchFn, rrfK = RRF_K } = {}) {
+export async function deepSearch(db, params, { llm, searchFn = defaultSearchFn, rrfK = RRF_K, auto = false } = {}) {
   const query = String(params?.query ?? '').trim();
   if (!query) return { results: [], variants: [] };
-  const variants = await rewriteQuery(query, { llm });
+  // No injected llm: EXPLICIT deep=true uses the patient defaultLLM; the AUTO
+  // path uses a fail-fast + throttled provider with no retry and a process-
+  // lifetime rewrite cache (D#40). An injected llm (tests) is used verbatim.
+  let rewriteLlm = llm;
+  let retries = 1;
+  let cache = false;
+  if (!rewriteLlm) {
+    if (auto) { rewriteLlm = makeAutoLlm(); retries = 0; cache = true; }
+    else rewriteLlm = defaultLLM;
+  }
+  const variants = await rewriteQuery(query, { llm: rewriteLlm, retries, cache });
   const lists = variants.map((v, i) => {
     // variant[0] is the ORIGINAL query: let an engine error propagate exactly as
     // it does on the single-query baseline path, so "never worse than baseline"

package/haiku-client.mjs CHANGED Viewed

@@ -5,7 +5,7 @@
 // Model configurable via CLAUDE_MEM_MODEL (haiku|sonnet); OpenRouter slug
 // overridable via OPENROUTER_MODEL
-import { execFileSync } from 'child_process';
+import { execFileSync, spawn } from 'child_process';
 import { readFileSync } from 'fs';
 import { join } from 'path';
 import { randomUUID } from 'crypto';
@@ -247,6 +247,44 @@ export async function callModelJSON(prompt, model = 'haiku', opts) {
   return parseJsonFromLLM(result.text);
 }
+/**
+ * JSON-returning, FULLY-ASYNC model call for the long-lived server hot path
+ * (deep-search auto-escalation). Like callModelJSON, but every CLI invocation —
+ * cli-mode primary AND the post-provider-failure fallback — uses the
+ * non-blocking callModelCLIAsync, so a keyed-provider outage can never drop onto
+ * the blocking execFileSync path and freeze the MCP event loop (D#40). Never
+ * throws; returns parsed JSON or null.
+ * @param {string|{system?:string,user:string}} prompt
+ * @param {'haiku'|'sonnet'} model
+ * @param {{timeout?:number,maxTokens?:number,temperature?:number}} [opts]
+ * @returns {Promise<object|null>}
+ */
+export async function callModelJSONAsync(prompt, model = 'haiku', { timeout = 15000, maxTokens = 1000, temperature = DEFAULT_LLM_TEMPERATURE } = {}) {
+  if (!prompt) return null;
+  const resolvedModel = MODEL_MAP[model] ? model : 'haiku';
+  const mode = detectMode();
+  if (mode === 'cli') {
+    const res = await callModelCLIAsync(prompt, resolvedModel, { timeout });
+    return res?.text ? parseJsonFromLLM(res.text) : null;
+  }
+  // Keyed provider (api/openrouter): try it, then degrade to the ASYNC CLI on any
+  // failure — NOT the blocking execFileSync callModelCLI that callModelJSON uses.
+  let primary = null;
+  try {
+    primary = mode === 'api'
+      ? await callModelAPI(prompt, resolvedModel, { timeout, maxTokens, temperature })
+      : await callOpenRouterAPI(prompt, resolvedModel, { timeout, maxTokens, temperature });
+  } catch (e) {
+    debugCatch(e, `callModelJSONAsync:${mode}:${resolvedModel}`);
+  }
+  if (primary?.text) return parseJsonFromLLM(primary.text);
+  const res = await callModelCLIAsync(prompt, resolvedModel, { timeout });
+  return res?.text ? parseJsonFromLLM(res.text) : null;
+}
 async function callModelAPI(prompt, model, { timeout, maxTokens, temperature = DEFAULT_LLM_TEMPERATURE }) {
   const apiKey = process.env.ANTHROPIC_API_KEY;
   if (!apiKey) return null;
@@ -319,6 +357,72 @@ function callModelCLI(prompt, model, { timeout }) {
   }
 }
+/**
+ * Async, non-blocking sibling of callModelCLI for the long-lived MCP server hot
+ * path (deep-search auto-escalation, D#40). execFileSync blocks the event loop for
+ * the whole subprocess lifetime — acceptable in short-lived hook processes
+ * (callModelCLI), not inside an MCP request handler. Uses spawn + stdin so the
+ * untrusted query stays out of argv (ps-visible) and the boundary-marker model is
+ * preserved. Never rejects: resolves {text} on non-empty stdout, null on
+ * error/empty. On timeout it SIGKILLs the child with NO retry (fail-fast) and
+ * salvages a complete JSON payload from partial stdout (mirrors callModelCLI's
+ * catch-salvage; tolerant of Haiku's ```json fencing per #8605, which the upstream
+ * parseJsonFromLLM strips).
+ * @param {string|{system?:string,user:string}} prompt
+ * @param {'haiku'|'sonnet'} model
+ * @param {{timeout:number}} opts  SIGKILL after `timeout` ms; no retry.
+ * @returns {Promise<{text:string}|null>}
+ */
+export function callModelCLIAsync(prompt, model, { timeout }) {
+  return new Promise((resolve) => {
+    const modelName = MODEL_MAP[model] ? model : 'haiku';
+    let child;
+    try {
+      child = spawn(getClaudePath(), ['-p', '--model', modelName], {
+        env: { ...process.env, CLAUDE_MEM_HOOK_RUNNING: '1' },
+        cwd: '/tmp',
+        stdio: ['pipe', 'pipe', 'pipe'],
+      });
+    } catch (e) {
+      debugCatch(e, `${model}-cli-async`);
+      resolve(null);
+      return;
+    }
+    let stdout = '';
+    let settled = false;
+    const done = (val) => {
+      if (settled) return;
+      settled = true;
+      clearTimeout(timer);
+      resolve(val);
+    };
+    const timer = setTimeout(() => {
+      try { child.kill('SIGKILL'); } catch { /* already gone */ }
+      const t = stdout.trim();
+      if (t.startsWith('{') && t.endsWith('}')) {
+        try { JSON.parse(t); done({ text: t }); return; } catch { /* not complete JSON */ }
+      }
+      done(null);
+    }, timeout);
+    child.stdout?.setEncoding('utf8'); // decode multi-byte UTF-8 (CJK) across chunk boundaries
+    child.stdout?.on('data', (d) => { stdout += d; });
+    child.stderr?.on('data', () => {}); // drain stderr so a chatty child can't block on a full pipe
+    child.on('error', (e) => { debugCatch(e, `${model}-cli-async`); done(null); });
+    child.on('close', () => {
+      const t = stdout.trim();
+      done(t ? { text: t } : null);
+    });
+    // EPIPE guard: the child may exit before we finish writing stdin.
+    child.stdin?.on('error', () => {});
+    try {
+      child.stdin?.write(flattenForCLI(prompt));
+      child.stdin?.end();
+    } catch (e) {
+      debugCatch(e, `${model}-cli-async:stdin`);
+    }
+  });
+}
 // ─── API Mode ────────────────────────────────────────────────────────────────
 async function callHaikuAPI(prompt, { timeout, maxTokens, temperature = DEFAULT_LLM_TEMPERATURE }) {

package/mem-cli.mjs CHANGED Viewed

@@ -10,7 +10,7 @@ import { TIER_CASE_SQL, tierSqlParams } from './tier.mjs';
 import { _resetVocabCache } from './tfidf.mjs';
 import { autoBoostIfNeeded, reRankWithContext, markSuperseded } from './server-internals.mjs';
 import { searchObservationsHybrid, countSearchTotal } from './search-engine.mjs';
-import { deepSearch } from './deep-search.mjs';
+import { deepSearch, resolveDeepMode, shouldEscalateToDeep, autoDeepLlmReady, hasEscalatableCorpus } from './deep-search.mjs';
 import { ensureRegistryDb, upsertResource } from './registry.mjs';
 import { searchResources } from './registry-retriever.mjs';
 import { selectCompressionCandidates, groupByProjectWeek, compressGroup } from './lib/compress-core.mjs';
@@ -48,11 +48,11 @@ import {
 // ─── Commands ────────────────────────────────────────────────────────────────
-async function cmdSearch(db, args) {
+async function cmdSearch(db, args, { llm } = {}) {
   const { positional, flags } = parseArgs(args);
   const query = positional.join(' ');
   if (!query) {
-    fail('[mem] Usage: claude-mem-lite search <query> [--type TYPE] [--source SOURCE] [--limit N] [--project P] [--from DATE] [--to DATE] [--importance N] [--branch B] [--offset N] [--sort relevance|time|importance] [--include-noise] [--deep]');
+    fail('[mem] Usage: claude-mem-lite search <query> [--type TYPE] [--source SOURCE] [--limit N] [--project P] [--from DATE] [--to DATE] [--importance N] [--branch B] [--offset N] [--sort relevance|time|importance] [--include-noise] [--deep] [--no-deep]');
     return;
   }
@@ -103,7 +103,11 @@ async function cmdSearch(db, args) {
   // --deep: opt-in LLM multi-query / HyDE deep search (deep-search.mjs). Costs one
   // Haiku call + N hybrid searches; observations-only. NOT the passive path — this
   // is the explicit "search harder" lever for vocabulary-mismatch recall misses.
-  const deep = flags.deep === true || flags.deep === 'true';
+  // --deep forces deep; --no-deep forces normal; neither = unset (env/default decide).
+  const explicitDeep = (flags.deep === true || flags.deep === 'true')
+    ? true
+    : ((flags['no-deep'] === true || flags['no-deep'] === 'true') ? false : undefined);
+  const deepMode = resolveDeepMode(explicitDeep, { surface: 'cli' });
   if (source && !['observations', 'sessions', 'prompts'].includes(source)) {
     fail(`[mem] Invalid --source "${source}". Use: observations, sessions, prompts`);
@@ -113,13 +117,13 @@ async function cmdSearch(db, args) {
   const ftsQuery = buildSearchFtsQuery(query, { or: useOr });
   // --deep proceeds even when the literal query sanitizes to nothing — its LLM
   // rewrite may still produce searchable variants (F3, parity with server.mjs).
-  if (!ftsQuery && !deep) {
+  if (!ftsQuery && deepMode === 'normal') {
     fail(`[mem] No valid search terms in "${query}"`);
     return;
   }
   // --deep ignores --or: each variant runs AND + the engine's built-in
   // OR-fallback, so --or has no effect on the deep path — say so (F8).
-  if (deep && useOr) {
+  if (deepMode === 'deep' && useOr) {
     process.stderr.write('[mem] Note: --or has no effect with --deep (variants use AND + engine OR-fallback)\n');
   }
@@ -135,10 +139,10 @@ async function cmdSearch(db, args) {
   // who passed --branch expecting a branch-scoped result.
   // --deep is observations-only (deepSearch fuses searchObservationsHybrid lists);
   // it overrides --source and the obs-only filter inference.
-  if (deep && source && source !== 'observations') {
+  if (deepMode === 'deep' && source && source !== 'observations') {
     process.stderr.write(`[mem] Note: --deep searches observations only; ignoring --source ${source}\n`);
   }
-  const effectiveSource = deep
+  const effectiveSource = deepMode === 'deep'
     ? 'observations'
     : (source || ((type || tier || minImportance || branch) ? 'observations' : null));
@@ -156,14 +160,29 @@ async function cmdSearch(db, args) {
   let orFallbackFired = false;
   let deepVariants = null;
+  let isDeep = deepMode === 'deep';
   // Search observations — shared engine with server.mjs (#8198/#8212 paired-path fix)
   if (!effectiveSource || effectiveSource === 'observations') {
-    let obsResults;
-    if (deep) {
-      // Opt-in deep search: rewrite the query into variants (keyword / concept /
-      // HyDE), run each through the hybrid engine, RRF-fuse. Collapses to the
-      // single query when the rewrite yields nothing — never worse than baseline
-      // (deep-search.mjs). Over-fetch perSourceLimit so the offset/slice below has room.
+    const obsCtx = {
+      ftsQuery,
+      args: {
+        project: project || null,
+        obs_type: type || null,
+        importance: minImportance || null,
+        branch: branch || null,
+        include_noise: includeNoise,
+      },
+      epochFrom: dateFrom,
+      epochTo: dateTo,
+      perSourceLimit,
+      perSourceOffset,
+      currentProject: project ? null : inferProject(),
+      limit,
+      orFallbackFired: false,
+    };
+    const runDeep = async ({ auto = false } = {}) => {
       const ds = await deepSearch(db, {
         query,
         project: project || null,
@@ -175,34 +194,27 @@ async function cmdSearch(db, args) {
         epochTo: dateTo,
         limit: perSourceLimit,
         currentProject: project ? null : inferProject(),
-      });
-      obsResults = ds.results;
+      }, llm ? { llm } : { auto });
       deepVariants = ds.variants;
       if (deepVariants.length > 1) {
         process.stderr.write(`[mem] Deep search: rewrote into ${deepVariants.length} query variants, RRF-fused\n`);
       } else {
         process.stderr.write('[mem] Deep search: rewrite returned no usable variants; used original query only\n');
       }
+      return ds.results;
+    };
+    let obsResults;
+    if (deepMode === 'deep') {
+      obsResults = await runDeep();
     } else {
-      const obsCtx = {
-        ftsQuery,
-        args: {
-          project: project || null,
-          obs_type: type || null,
-          importance: minImportance || null,
-          branch: branch || null,
-          include_noise: includeNoise,
-        },
-        epochFrom: dateFrom,
-        epochTo: dateTo,
-        perSourceLimit,
-        perSourceOffset,
-        currentProject: project ? null : inferProject(),
-        limit,
-        orFallbackFired: false,
-      };
       obsResults = searchObservationsHybrid(db, obsCtx);
       if (obsCtx.orFallbackFired) orFallbackFired = true;
+      if (deepMode === 'auto' && autoDeepLlmReady(process.env, llm) && shouldEscalateToDeep(obsResults, obsCtx) && hasEscalatableCorpus(db, project || null)) {
+        process.stderr.write(`[mem] auto-escalated to deep search (weak results: ${obsResults.length} hits)\n`);
+        obsResults = await runDeep({ auto: true });
+        isDeep = true;
+      }
     }
     for (const r of obsResults) results.push({ ...r, _source: 'obs', score: r.score ?? 0 });
@@ -215,7 +227,7 @@ async function cmdSearch(db, args) {
   }
   // Search sessions (shared engine with MCP mem_search — lib/search-core.mjs)
-  if (!effectiveSource || effectiveSource === 'sessions') {
+  if ((!effectiveSource || effectiveSource === 'sessions') && !isDeep) {
     try {
       const sessRows = searchSessionsFts(db, {
         ftsQuery, project, projectBoost: project ? null : inferProject(),
@@ -226,7 +238,7 @@ async function cmdSearch(db, args) {
   }
   // Search prompts (shared engine incl. CJK precision gate + LIKE fallback)
-  if (!effectiveSource || effectiveSource === 'prompts') {
+  if ((!effectiveSource || effectiveSource === 'prompts') && !isDeep) {
     try {
       const promptRows = searchPromptsFts(db, {
         query, ftsQuery, project,
@@ -238,7 +250,7 @@ async function cmdSearch(db, args) {
   if (results.length === 0) {
     if (jsonOutput) {
-      out(JSON.stringify({ query, total: 0, returned: 0, offset, limit, deep, variants: deep ? deepVariants : undefined, results: [] }));
+      out(JSON.stringify({ query, total: 0, returned: 0, offset, limit, deep: isDeep, variants: isDeep ? deepVariants : undefined, results: [] }));
     } else {
       out(`[mem] No results for "${query}"`);
     }
@@ -280,7 +292,7 @@ async function cmdSearch(db, args) {
   // in `results` (deep is obs-only). countSearchTotal would instead count the
   // ORIGINAL query's FTS matches — wrong, and ~0 on the vocabulary-mismatch
   // queries deep exists for, which falsely shrinks the "N of M" total (F1).
-  const total = deep
+  const total = isDeep
     ? results.length
     : Math.max(countSearchTotal(db, {
       effectiveSource,
@@ -296,7 +308,7 @@ async function cmdSearch(db, args) {
   if (paged.length === 0) {
     if (jsonOutput) {
-      out(JSON.stringify({ query, total, returned: 0, offset, limit, deep, variants: deep ? deepVariants : undefined, results: [] }));
+      out(JSON.stringify({ query, total, returned: 0, offset, limit, deep: isDeep, variants: isDeep ? deepVariants : undefined, results: [] }));
     } else {
       out(`[mem] No results for "${query}" at offset ${offset}`);
     }
@@ -339,8 +351,8 @@ async function cmdSearch(db, args) {
       returned: paged.length,
       offset,
       limit,
-      deep,
-      variants: deep ? deepVariants : undefined,
+      deep: isDeep,
+      variants: isDeep ? deepVariants : undefined,
       relaxed_and_to_or: orFallbackFired && !useOr,
       mixed_sources: hasMixed,
       results: items,
@@ -504,6 +516,9 @@ const OBS_FIELDS = ['id', 'type', 'title', 'subtitle', 'narrative', 'text', 'fac
 // top; re-exported here for back-compat with existing importers
 // (tests/get-time-format.test.mjs).
 export { OBS_TIME_FIELDS, formatObsFieldValue };
+// Test seam: exposes cmdSearch with the llm injection slot without going through
+// ensureDb — lets hermetic tests pass a seeded :memory: db and a stub llm.
+export async function cmdSearchForTest(db, args, opts) { return cmdSearch(db, args, opts); }
 function renderObsRows(db, ids, requestedFields) {
   const placeholders = ids.map(() => '?').join(',');

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "claude-mem-lite",
-  "version": "3.1.2",
+  "version": "3.3.0",
   "description": "Persistent long-term memory for Claude Code via MCP — captures coding decisions, bugfixes, and context across sessions. Hybrid FTS5 + TF-IDF search with episode batching. Single SQLite DB, no external services. A lighter, lower-cost alternative to claude-mem (episode batching + a smaller model; cost savings are an internal estimate, not a measured benchmark).",
   "type": "module",
   "packageManager": "npm@10.9.2",

package/server.mjs CHANGED Viewed

@@ -10,7 +10,7 @@ import { resolveProject as _resolveProjectShared } from './project-utils.mjs';
 import { ensureDb, DB_PATH, DB_DIR, REGISTRY_DB_PATH } from './schema.mjs';
 import { reRankWithContext, markSuperseded, autoBoostIfNeeded, runIdleCleanup, buildServerInstructions } from './server-internals.mjs';
 import { searchObservationsHybrid, countSearchTotal } from './search-engine.mjs';
-import { deepSearch } from './deep-search.mjs';
+import { deepSearch, resolveDeepMode, shouldEscalateToDeep, autoDeepLlmReady, hasEscalatableCorpus } from './deep-search.mjs';
 import { selectCompressionCandidates, groupByProjectWeek, compressGroup } from './lib/compress-core.mjs';
 import { resolveAnchorToken, formatAnchorError, resolveQueryAnchor, fetchRecentTimeline, fetchTimelineWindow } from './lib/timeline-core.mjs';
 import { buildSearchFtsQuery, parseDateBounds, computePerSourceWindow, effectiveObsFtsQuery, searchSessionsFts, searchPromptsFts, normalizeCrossSourceScores, applyUserSort, applyTierFilter } from './lib/search-core.mjs';
@@ -168,16 +168,19 @@ function safeHandler(fn) {
 // Thin wrapper around the shared engine — keeps the existing call sites
 // (searchObservations(ctx)) without ferrying `db` through every layer.
+// ctx.db is set by runSearchPipeline when an injected db is present (e.g. tests);
+// falls back to the module-level db for the normal MCP handler path.
 function searchObservations(ctx) {
-  return searchObservationsHybrid(db, ctx);
+  return searchObservationsHybrid(ctx.db ?? db, ctx);
 }
 function searchSessions(ctx) {
+  const _db = ctx.db ?? db;
   const { ftsQuery, searchType, args, epochFrom, epochTo, perSourceLimit, perSourceOffset, currentProject } = ctx;
   const results = [];
   if (ftsQuery) {
-    const rows = searchSessionsFts(db, {
+    const rows = searchSessionsFts(_db, {
       ftsQuery, project: args.project ?? null,
       projectBoost: args.project ? null : currentProject,
       epochFrom, epochTo, perSourceLimit, perSourceOffset,
@@ -195,7 +198,7 @@ function searchSessions(ctx) {
     if (epochTo !== null) { wheres.push('created_at_epoch <= ?'); params.push(epochTo); }
     const where = wheres.length ? `WHERE ${wheres.join(' AND ')}` : '';
     params.push(perSourceLimit, perSourceOffset);
-    const rows = db.prepare(`
+    const rows = _db.prepare(`
       SELECT id, request, completed, project, created_at, created_at_epoch
       FROM session_summaries ${where}
       ORDER BY created_at_epoch DESC
@@ -210,13 +213,14 @@ function searchSessions(ctx) {
 }
 function searchPrompts(ctx) {
+  const _db = ctx.db ?? db;
   const { ftsQuery, searchType, args, epochFrom, epochTo, perSourceLimit, perSourceOffset } = ctx;
   const results = [];
   if (ftsQuery) {
     // CJK precision gate + LIKE fallback live in the shared core (see
     // lib/search-core.mjs for the leak rationale).
-    const rows = searchPromptsFts(db, {
+    const rows = searchPromptsFts(_db, {
       query: args.query, ftsQuery, project: args.project ?? null,
       epochFrom, epochTo, perSourceLimit, perSourceOffset,
     });
@@ -231,7 +235,7 @@ function searchPrompts(ctx) {
     if (epochTo !== null) { wheres.push('p.created_at_epoch <= ?'); params.push(epochTo); }
     const where = wheres.length ? `WHERE ${wheres.join(' AND ')}` : '';
     params.push(perSourceLimit, perSourceOffset);
-    const rows = db.prepare(`
+    const rows = _db.prepare(`
       SELECT p.id, p.prompt_text, p.content_session_id, p.created_at, p.created_at_epoch
       FROM user_prompts p
       JOIN sdk_sessions s ON p.content_session_id = s.content_session_id
@@ -247,10 +251,10 @@ function searchPrompts(ctx) {
   return results;
 }
-function formatSearchOutput(paginatedResults, args, ftsQuery, totalCount, orFallbackFired = false) {
+function formatSearchOutput(paginatedResults, args, ftsQuery, totalCount, orFallbackFired = false, isDeepSearch = false) {
   if (paginatedResults.length === 0) {
     const hint = [];
-    if (args.deep) {
+    if (isDeepSearch) {
       // Deep search runs even when the literal query sanitizes to empty, so the
       // "query was filtered" hint below would be misleading — the LLM rewrite ran
       // N variants and simply found nothing (F9).
@@ -310,13 +314,17 @@ function formatSearchOutput(paginatedResults, args, ftsQuery, totalCount, orFall
 // ─── Tool: mem_search ───────────────────────────────────────────────────────
-server.registerTool(
-  'mem_search',
-  {
-    description: descriptionOf('mem_search'),
-    inputSchema: memSearchSchema,
-  },
-  safeHandler(async (args) => {
+// Exported for tests: runs the full mem_search pipeline against an explicit db
+// with an optional injected llm (deepSearch dependency). The MCP tool handler
+// calls this with the module db and the default llm.
+// NOTE: resolveProject() inside runSearchPipeline closes over the module-level `db`,
+// not the injected one. Tests that pass a project: arg via this seam will trigger
+// resolveProject() against the real (module) DB, not the test DB.
+export async function handleSearchForTest(db, args, { llm } = {}) {
+  return runSearchPipeline(db, args, { llm });
+}
+async function runSearchPipeline(db, args, { llm } = {}) {
     if (args.project) args = { ...args, project: resolveProject(args.project) };
     const limit = args.limit ?? 20;
     const offset = args.offset ?? 0;
@@ -338,46 +346,75 @@ server.registerTool(
     if (!bounds.ok) throw new Error(`Invalid date_${bounds.bad}: "${bounds.value}" (use ISO 8601 or YYYY-MM-DD)`);
     const { epochFrom, epochTo } = bounds;
+    // Resolve tri-state deep mode. MCP defaults to 'auto' (escalate on weak results)
+    // unless explicitly overridden via args.deep or CLAUDE_MEM_AUTO_DEEP env flag.
+    const deepMode = resolveDeepMode(args.deep, { surface: 'mcp' });
     // Early return when query was provided but sanitized to nothing (all FTS5
-    // keywords/special chars). Skipped for deep search — its LLM rewrite may
-    // still produce searchable variants from a query the FTS sanitizer rejects.
-    if (args.query && !ftsQuery && !epochFrom && !epochTo && !args.obs_type && !args.importance && !args.deep) {
-      return formatSearchOutput([], args, ftsQuery, 0);
+    // keywords/special chars). Skipped for deep/auto — deep's LLM rewrite may
+    // still produce searchable variants from a query the FTS sanitizer rejects,
+    // and auto could escalate similarly.
+    if (args.query && !ftsQuery && !epochFrom && !epochTo && !args.obs_type && !args.importance && deepMode === 'normal') {
+      return { ...formatSearchOutput([], args, ftsQuery, 0), escalated: false, results: [], total: 0, variants: null };
     }
     // When obs_type is specified, implicitly restrict to observations only.
-    // --deep is observations-only too (deepSearch fuses hybrid-obs lists).
-    const effectiveType = args.deep ? 'observations' : (searchType || (args.obs_type ? 'observations' : undefined));
+    // deep mode is observations-only too (deepSearch fuses hybrid-obs lists).
+    const effectiveType = deepMode === 'deep' ? 'observations' : (searchType || (args.obs_type ? 'observations' : undefined));
     const isCrossSource = !effectiveType;
-    const ctx = { ftsQuery, searchType: effectiveType, args, epochFrom, epochTo, perSourceLimit, perSourceOffset, currentProject, limit };
+    const ctx = { db, ftsQuery, searchType: effectiveType, args, epochFrom, epochTo, perSourceLimit, perSourceOffset, currentProject, limit };
     const results = [];
     let deepVariants = null;
+    let isDeep = deepMode === 'deep';
+    let escalated = false;
+    let escalatedObsCount = 0;
+    // Helper: run deepSearch and load results into the shared `results` array.
+    const runDeepInto = async ({ auto = false } = {}) => {
+      const { results: deepRows, variants } = await deepSearch(db, {
+        query: args.query,
+        project: args.project || null,
+        type: args.obs_type || null,
+        importance: args.importance || null,
+        branch: args.branch || null,
+        includeNoise: args.include_noise === true,
+        epochFrom, epochTo,
+        limit: perSourceLimit,
+        currentProject,
+      }, llm ? { llm } : { auto });
+      // Safe to reset: sessions/prompts are pushed AFTER the obs block, so nothing is lost here.
+      results.length = 0;
+      results.push(...deepRows);
+      deepVariants = variants;
+    };
     if (!effectiveType || effectiveType === 'observations') {
-      if (args.deep) {
+      if (deepMode === 'deep') {
         // Opt-in LLM multi-query/HyDE deep search: rewrite → per-variant hybrid
         // search → RRF fusion, collapsing to the single query (== baseline) when
         // the rewrite yields nothing (deep-search.mjs). Over-fetch perSourceLimit
         // so the pagination slice below has room.
-        const { results: deepRows, variants } = await deepSearch(db, {
-          query: args.query,
-          project: args.project || null,
-          type: args.obs_type || null,
-          importance: args.importance || null,
-          branch: args.branch || null,
-          includeNoise: args.include_noise === true,
-          epochFrom, epochTo,
-          limit: perSourceLimit,
-          currentProject,
-        });
-        results.push(...deepRows);
-        deepVariants = variants;
+        await runDeepInto();
       } else {
         results.push(...searchObservations(ctx));
+        // Auto-escalate: if normal search is weak (too few results or OR fallback
+        // fired — a vocabulary-mismatch symptom), escalate to deep. ctx is mutated
+        // by searchObservations to set ctx.orFallbackFired when the AND→OR relaxation
+        // fires, so we read it here after the call.
+        // results is already obs-only here (sessions/prompts pushed below), but the
+        // filter makes the invariant explicit and robust to future reordering.
+        const obsCountBeforeEscalation = results.length;
+        if (deepMode === 'auto' && autoDeepLlmReady(process.env, llm) && shouldEscalateToDeep(results.filter(r => r.source === 'obs'), ctx) && hasEscalatableCorpus(db, args.project || null)) {
+          await runDeepInto({ auto: true });
+          isDeep = true;
+          escalated = true;
+          escalatedObsCount = obsCountBeforeEscalation;
+        }
       }
     }
-    if (!effectiveType || effectiveType === 'sessions')     results.push(...searchSessions(ctx));
-    if (!effectiveType || effectiveType === 'prompts')       results.push(...searchPrompts(ctx));
+    // Sessions and prompts are excluded when deep (obs-only invariant, #8735).
+    if ((!effectiveType || effectiveType === 'sessions') && !isDeep) results.push(...searchSessions(ctx));
+    if ((!effectiveType || effectiveType === 'prompts') && !isDeep)   results.push(...searchPrompts(ctx));
     // Type-list fallback: when obs_type is specified and FTS finds nothing,
     // list recent observations of that type (user likely wants to browse by type)
@@ -421,7 +458,7 @@ server.registerTool(
     // ftsQuery but the rewrite still returned rows (F2). reRankWithContext + the
     // re-sort are FTS-rank operations; deep rows are already RRF-ranked, so on the
     // empty-ftsQuery deep path we tag-but-don't-reorder (keep RRF order).
-    if ((ftsQuery || args.deep) && results.some(r => r.source === 'obs')) {
+    if ((ftsQuery || isDeep) && results.some(r => r.source === 'obs')) {
       const obsResults = results.filter(r => r.source === 'obs');
       if (ftsQuery) reRankWithContext(db, obsResults, currentProject);
       markSuperseded(obsResults);
@@ -445,11 +482,11 @@ server.registerTool(
     // results.length is NOT the population — count the real MATCH set instead. Clamp
     // to >= results.length so vector/concept-augmented obs rows are never undercounted.
     // (paired-path with mem-cli.mjs via shared countSearchTotal — #8217)
-    // For --deep the population is the fused variant set already in `results`
-    // (deep is obs-only, returned by deepSearch capped at perSourceLimit).
-    // countSearchTotal would count the ORIGINAL query's FTS matches instead —
-    // wrong, and ~0 on the vocabulary-mismatch queries deep exists for (F1).
-    const totalBeforePagination = args.deep
+    // For deep (explicit or auto-escalated), the population is the fused variant set
+    // already in `results` (deep is obs-only, returned by deepSearch capped at
+    // perSourceLimit). countSearchTotal would count the ORIGINAL query's FTS matches
+    // instead — wrong, and ~0 on the vocabulary-mismatch queries deep exists for (F1).
+    const totalBeforePagination = isDeep
       ? results.length
       : Math.max(countSearchTotal(db, {
         effectiveSource: effectiveType || null,
@@ -463,16 +500,32 @@ server.registerTool(
     // Always apply pagination — single-source results can exceed SQL LIMIT due to expansion (concept co-occurrence, PRF, vector search)
     const paginatedResults = (offset > 0 || results.length > limit) ? results.slice(offset, offset + limit) : results;
-    const output = formatSearchOutput(paginatedResults, args, ftsQuery, totalBeforePagination, ctx.orFallbackFired === true);
+    // Observability: announce auto-escalation on stderr (parity with CLI deep note).
+    if (escalated) process.stderr.write(`[mem] auto-escalated to deep search (weak results: ${escalatedObsCount} hits)\n`);
+    const output = formatSearchOutput(paginatedResults, args, ftsQuery, totalBeforePagination, ctx.orFallbackFired === true, isDeep);
     // Surface the rewrite to the calling agent (CLI prints this to stderr + JSON;
     // MCP had no signal at all — F13). Tells the agent whether deep actually
     // reformulated the query or collapsed to the single-query baseline.
-    if (args.deep && deepVariants && output.content?.[0]?.type === 'text') {
+    if (isDeep && deepVariants && output.content?.[0]?.type === 'text') {
       output.content[0].text += deepVariants.length > 1
         ? `\n\n[deep search: rewrote into ${deepVariants.length} variants — ${deepVariants.slice(1).map(v => JSON.stringify(v)).join(', ')}]`
         : '\n\n[deep search: rewrite produced no usable variants; searched the original query only (== baseline)]';
     }
-    return output;
+    // Return an object that exposes structured fields for tests + the MCP content blob.
+    return { ...output, results: paginatedResults, total: totalBeforePagination, escalated, variants: deepVariants };
+}
+server.registerTool(
+  'mem_search',
+  {
+    description: descriptionOf('mem_search'),
+    inputSchema: memSearchSchema,
+  },
+  safeHandler(async (args) => {
+    const result = await runSearchPipeline(db, args, {});
+    return { content: result.content };
   })
 );

package/tool-schemas.mjs CHANGED Viewed

@@ -93,7 +93,7 @@ export const memSearchSchema = {
   sort: z.enum(['relevance', 'time', 'importance']).optional().describe('Sort order: relevance (default, BM25), time (newest first), importance (highest first)'),
   include_noise: z.boolean().optional().describe('Include hook-llm fallback titles ("Modified X", "Worked on X", raw error logs) — hidden by default as they have ~3% access rate'),
   or: coerceBool.optional().describe('Force OR semantics between query terms from the start (default: AND with automatic OR-fallback when AND returns 0). Aligns with CLI --or.'),
-  deep: coerceBool.optional().describe('Opt-in LLM multi-query/HyDE deep search: one Haiku call rewrites the query into keyword/concept/HyDE variants, each runs the hybrid search, results RRF-fused. Observations-only; costs a Haiku call + seconds of latency. Use ONLY when a normal search missed because your wording differs from the stored terms (vocabulary mismatch). Default false; passive recall stays single-query.'),
+  deep: coerceBool.optional().describe('Tri-state LLM multi-query/HyDE deep search (observations-only). true=force; false=never; omit=AUTO (default ON for mem_search): a normal search that returns weak/few results auto-escalates with ONE Haiku call (query rewritten to keyword/concept/HyDE variants, RRF-fused). Set CLAUDE_MEM_AUTO_DEEP=0 to disable AUTO. Passive recall stays single-query.'),
 };
 export const memRecentSchema = {
@@ -350,7 +350,7 @@ export const tools = [
       '  - Investigating a concrete error keyword with obs_type="bugfix"\n' +
       '  - Looking for prior art on a module/feature before refactoring\n' +
       '  - User asks "have we seen this before" or references something not in visible context\n' +
-      '  - A normal search missed — set deep=true to LLM-rewrite the query (slower)\n' +
+      '  - A normal search missed — weak results auto-escalate to deep (set deep=false to opt out)\n' +
       '\n' +
       'Equivalent CLI: ' + CLI_INVOKE + ' search "<query>" [--type bugfix] [--deep]',
     inputSchema: memSearchSchema,