claude-mem-lite 3.2.0 → 3.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -10,7 +10,7 @@
10
10
  "plugins": [
11
11
  {
12
12
  "name": "claude-mem-lite",
13
- "version": "3.2.0",
13
+ "version": "3.3.1",
14
14
  "source": "./",
15
15
  "description": "Persistent long-term memory for Claude Code via MCP — captures coding decisions, bugfixes, and context across sessions. Hybrid FTS5 + TF-IDF search with episode batching. Single SQLite DB, no external services. A lighter, lower-cost alternative to claude-mem (episode batching + a smaller model; cost savings are an internal estimate, not a measured benchmark)."
16
16
  }
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "claude-mem-lite",
3
- "version": "3.2.0",
3
+ "version": "3.3.1",
4
4
  "description": "Persistent long-term memory for Claude Code via MCP — captures coding decisions, bugfixes, and context across sessions. Hybrid FTS5 + TF-IDF search with episode batching. Single SQLite DB, no external services. A lighter, lower-cost alternative to claude-mem (episode batching + a smaller model; cost savings are an internal estimate, not a measured benchmark).",
5
5
  "author": {
6
6
  "name": "sdsrss"
package/deep-search.mjs CHANGED
@@ -68,15 +68,24 @@ export function hasEscalatableCorpus(db, project, min = AUTO_DEEP_MIN_CORPUS) {
68
68
 
69
69
  /**
70
70
  * Is a usable LLM available for AUTO escalation? True when a stub/real llm is
71
- * injected (tests), or a FAST provider key is set. The claude-CLI fallback is
72
- * deliberately excluded spawning a subprocess per search is too slow for the
73
- * default (automatic) path; explicit deep=true may still use it.
71
+ * injected (tests), a FAST provider key is set, OR the claude-CLI fallback is
72
+ * enabled (D#40: default-on for CLI-auth users; kill switch
73
+ * CLAUDE_MEM_AUTO_DEEP_CLI=0). The CLI path is made safe for the long-lived
74
+ * server hot path by the async/fail-fast/throttled auto provider (deepSearch
75
+ * auto), not by being excluded as it was before D#40.
74
76
  * @param {object} [env=process.env]
75
77
  * @param {Function|undefined} [injectedLlm]
76
78
  * @returns {boolean}
77
79
  */
78
80
  export function autoDeepLlmReady(env = process.env, injectedLlm) {
79
- return !!injectedLlm || !!(env.ANTHROPIC_API_KEY || env.OPENROUTER_API_KEY);
81
+ if (injectedLlm) return true;
82
+ if (env.ANTHROPIC_API_KEY || env.OPENROUTER_API_KEY) return true;
83
+ // No provider key → detectMode() would be 'cli'. CLI-auth users get auto
84
+ // escalation by default; the burst/latency cost is bounded by the auto
85
+ // provider (fail-fast + throttle) and a failed rewrite degrades to baseline.
86
+ // Kill switch honors the common disable spellings, not just the exact '0'.
87
+ const off = String(env.CLAUDE_MEM_AUTO_DEEP_CLI ?? '').trim().toLowerCase();
88
+ return !(off === '0' || off === 'false' || off === 'no' || off === 'off');
80
89
  }
81
90
 
82
91
  /**
@@ -189,12 +198,75 @@ export function assembleVariants(query, parsed, { max = MAX_VARIANTS } = {}) {
189
198
  return out;
190
199
  }
191
200
 
192
- // Default provider: pulled in lazily so importing deep-search.mjs (e.g. in tests
193
- // with an injected llm) never loads the LLM client. callModelJSON returns parsed
194
- // JSON or null, and never throws.
201
+ // ─── Auto-escalation safety machinery (D#40) ─────────────────────────────────
202
+ // The AUTO path can fire on every weak search across the long-lived MCP server,
203
+ // so it must be fail-fast (short timeout, no retry), throttled (bound bursts),
204
+ // and cached (skip repeat rewrites). The EXPLICIT deep=true path stays patient.
205
+
206
+ export const AUTO_DEEP_TIMEOUT_MS = 5000; // fail-fast budget for the auto path; no retry
207
+ export const AUTO_DEEP_THROTTLE_MS = 3000; // min gap between auto LLM rewrites, per process (bounds spawn rate)
208
+ const REWRITE_CACHE_MAX = 256; // LRU cap for the query→variants cache
209
+
210
+ let _lastAutoLlmAt = 0;
211
+ const _rewriteCache = new Map(); // normalized query → variants (string[]); successes only
212
+
213
+ /** Reset auto-path throttle + cache. Test-only; production state is per-process. */
214
+ export function _resetAutoDeepState() { _lastAutoLlmAt = 0; _rewriteCache.clear(); }
215
+
216
+ function cacheGet(key) {
217
+ if (!_rewriteCache.has(key)) return null;
218
+ const v = _rewriteCache.get(key);
219
+ _rewriteCache.delete(key); _rewriteCache.set(key, v); // LRU bump
220
+ return v.slice();
221
+ }
222
+ function cacheSet(key, variants) {
223
+ if (_rewriteCache.has(key)) _rewriteCache.delete(key);
224
+ _rewriteCache.set(key, variants.slice());
225
+ if (_rewriteCache.size > REWRITE_CACHE_MAX) {
226
+ _rewriteCache.delete(_rewriteCache.keys().next().value); // evict oldest
227
+ }
228
+ }
229
+
230
+ /**
231
+ * Wrap an llm so it fires at most once per `intervalMs` per process. A throttled
232
+ * call resolves null → rewriteQuery degrades to baseline (never worse). Exported
233
+ * for tests. Throttle state is module-global (shared across deepSearch calls).
234
+ *
235
+ * The clock advances on every ACTUAL call — success OR failure — deliberately:
236
+ * the throttle bounds the subprocess SPAWN RATE, and a failed spawn still costs a
237
+ * subprocess + its timeout, so a broken provider that always fails must be rate-
238
+ * limited too (gating only on success would let a persistent failure spawn on
239
+ * every weak search). The interval is kept short so one failure suppresses
240
+ * escalation only briefly, not for a long window.
241
+ */
242
+ export function makeThrottled(llm, { intervalMs = AUTO_DEEP_THROTTLE_MS } = {}) {
243
+ return async (prompt) => {
244
+ const now = Date.now();
245
+ if (now - _lastAutoLlmAt < intervalMs) return null;
246
+ _lastAutoLlmAt = now;
247
+ return llm(prompt);
248
+ };
249
+ }
250
+
251
+ // Run one rewrite LLM call via the fully-async dispatcher (callModelJSONAsync):
252
+ // every CLI invocation — cli-mode primary AND the post-provider-failure fallback
253
+ // — is non-blocking, so an MCP request handler never blocks the event loop even
254
+ // under a keyed-provider outage (D#40). Lazy import so tests with an injected llm
255
+ // never load the LLM client.
256
+ async function callRewriteLLM(prompt, { timeout }) {
257
+ const { callModelJSONAsync } = await import('./haiku-client.mjs');
258
+ return callModelJSONAsync(prompt, 'haiku', { timeout, maxTokens: 400 });
259
+ }
260
+
261
+ // Default (explicit deep=true) provider: patient timeout, no throttle/cache.
195
262
  async function defaultLLM(prompt) {
196
- const { callModelJSON } = await import('./haiku-client.mjs');
197
- return callModelJSON(prompt, 'haiku', { timeout: 12000, maxTokens: 400 });
263
+ return callRewriteLLM(prompt, { timeout: 12000 });
264
+ }
265
+
266
+ // Auto-path provider: fail-fast timeout + throttle. Built fresh per deepSearch
267
+ // call; the throttle clock it reads is module-global (per-process).
268
+ function makeAutoLlm() {
269
+ return makeThrottled((prompt) => callRewriteLLM(prompt, { timeout: AUTO_DEEP_TIMEOUT_MS }));
198
270
  }
199
271
 
200
272
  /**
@@ -205,11 +277,17 @@ async function defaultLLM(prompt) {
205
277
  * @param {object} [opts]
206
278
  * @param {(prompt: object) => Promise<object|null>} [opts.llm]
207
279
  * @param {number} [opts.retries=1]
280
+ * @param {boolean} [opts.cache=false] memoize successful rewrites (auto path)
208
281
  * @returns {Promise<string[]>}
209
282
  */
210
- export async function rewriteQuery(query, { llm = defaultLLM, retries = 1 } = {}) {
283
+ export async function rewriteQuery(query, { llm = defaultLLM, retries = 1, cache = false } = {}) {
211
284
  const original = String(query ?? '').trim();
212
285
  if (!original) return [];
286
+ const key = original.toLowerCase();
287
+ if (cache) {
288
+ const hit = cacheGet(key);
289
+ if (hit) return hit; // process-lifetime memo of a prior successful rewrite
290
+ }
213
291
  const prompt = buildRewritePrompt(original);
214
292
  for (let attempt = 0; attempt <= retries; attempt++) {
215
293
  let parsed;
@@ -219,7 +297,10 @@ export async function rewriteQuery(query, { llm = defaultLLM, retries = 1 } = {}
219
297
  parsed = null;
220
298
  }
221
299
  const variants = assembleVariants(original, parsed);
222
- if (variants.length > 1) return variants; // got at least one real rewrite
300
+ if (variants.length > 1) { // got at least one real rewrite
301
+ if (cache) cacheSet(key, variants); // cache successes only — failures retry next time
302
+ return variants;
303
+ }
223
304
  }
224
305
  return [original]; // robust floor — single-query == baseline
225
306
  }
@@ -304,13 +385,24 @@ function defaultSearchFn(db, query, params) {
304
385
  * @param {(prompt:object)=>Promise<object|null>} [deps.llm]
305
386
  * @param {(db:Database, query:string, params:object)=>Array} [deps.searchFn]
306
387
  * @param {number} [deps.rrfK=RRF_K]
388
+ * @param {boolean} [deps.auto=false] use the fail-fast/throttled/cached auto provider
307
389
  * @returns {Promise<{results: Array, variants: string[]}>}
308
390
  */
309
- export async function deepSearch(db, params, { llm = defaultLLM, searchFn = defaultSearchFn, rrfK = RRF_K } = {}) {
391
+ export async function deepSearch(db, params, { llm, searchFn = defaultSearchFn, rrfK = RRF_K, auto = false } = {}) {
310
392
  const query = String(params?.query ?? '').trim();
311
393
  if (!query) return { results: [], variants: [] };
312
394
 
313
- const variants = await rewriteQuery(query, { llm });
395
+ // No injected llm: EXPLICIT deep=true uses the patient defaultLLM; the AUTO
396
+ // path uses a fail-fast + throttled provider with no retry and a process-
397
+ // lifetime rewrite cache (D#40). An injected llm (tests) is used verbatim.
398
+ let rewriteLlm = llm;
399
+ let retries = 1;
400
+ let cache = false;
401
+ if (!rewriteLlm) {
402
+ if (auto) { rewriteLlm = makeAutoLlm(); retries = 0; cache = true; }
403
+ else rewriteLlm = defaultLLM;
404
+ }
405
+ const variants = await rewriteQuery(query, { llm: rewriteLlm, retries, cache });
314
406
  const lists = variants.map((v, i) => {
315
407
  // variant[0] is the ORIGINAL query: let an engine error propagate exactly as
316
408
  // it does on the single-query baseline path, so "never worse than baseline"
package/haiku-client.mjs CHANGED
@@ -5,13 +5,85 @@
5
5
  // Model configurable via CLAUDE_MEM_MODEL (haiku|sonnet); OpenRouter slug
6
6
  // overridable via OPENROUTER_MODEL
7
7
 
8
- import { execFileSync } from 'child_process';
8
+ import { execFileSync, spawn } from 'child_process';
9
+ import http from 'node:http';
10
+ import https from 'node:https';
11
+ import tls from 'node:tls';
9
12
  import { readFileSync } from 'fs';
10
13
  import { join } from 'path';
11
14
  import { randomUUID } from 'crypto';
12
15
  import { debugLog, debugCatch, parseJsonFromLLM } from './utils.mjs';
13
16
  import { DB_DIR } from './schema.mjs';
14
17
 
18
+ // ─── Proxy support (native fetch ignores HTTP(S)_PROXY) ──────────────────────
19
+ //
20
+ // Node's global fetch (undici) does NOT honour HTTP(S)_PROXY env vars, and
21
+ // undici's ProxyAgent isn't importable without adding a dependency. In an env
22
+ // that requires a local proxy to reach external APIs (e.g.
23
+ // HTTPS_PROXY=http://127.0.0.1:PORT), a direct fetch to openrouter.ai
24
+ // hangs/times out. We tunnel HTTPS through the HTTP CONNECT proxy using built-ins
25
+ // only. No proxy var (or a NO_PROXY host) → null → callers keep native fetch,
26
+ // unchanged (zero behaviour change when no proxy is configured).
27
+ function httpConnectProxyFor(targetUrl) {
28
+ const proxy = process.env.HTTPS_PROXY || process.env.https_proxy || process.env.HTTP_PROXY || process.env.http_proxy;
29
+ if (!proxy || !/^https?:\/\//.test(proxy)) return null; // socks5 ALL_PROXY not supported here
30
+ try {
31
+ const host = new URL(targetUrl).hostname;
32
+ const noProxy = (process.env.NO_PROXY || process.env.no_proxy || '').split(',').map((s) => s.trim()).filter(Boolean);
33
+ if (noProxy.some((n) => n === host || (n.startsWith('.') && host.endsWith(n.slice(1))))) return null;
34
+ return proxy;
35
+ } catch {
36
+ return null;
37
+ }
38
+ }
39
+
40
+ // fetch-compatible (subset) POST over an HTTP CONNECT tunnel: returns
41
+ // { ok, status, json(), text() }. Rejects on connect/timeout/socket error so the
42
+ // caller's try/catch degrades to the CLI exactly as a failed fetch would.
43
+ function postViaConnectProxy(proxy, url, { headers = {}, body = '', timeout = 20000 }) {
44
+ return new Promise((resolve, reject) => {
45
+ const p = new URL(proxy);
46
+ const t = new URL(url);
47
+ const port = Number(t.port) || 443;
48
+ let settled = false;
49
+ const finish = (fn, arg) => { if (!settled) { settled = true; fn(arg); } };
50
+ const connReq = http.request({
51
+ host: p.hostname,
52
+ port: Number(p.port) || 80,
53
+ method: 'CONNECT',
54
+ path: `${t.hostname}:${port}`,
55
+ headers: { Host: `${t.hostname}:${port}` },
56
+ });
57
+ connReq.setTimeout(timeout, () => connReq.destroy(new Error('proxy CONNECT timeout')));
58
+ connReq.on('error', (e) => finish(reject, e));
59
+ connReq.on('connect', (res, socket) => {
60
+ if (res.statusCode !== 200) {
61
+ socket.destroy();
62
+ return finish(reject, new Error(`proxy CONNECT ${res.statusCode}`));
63
+ }
64
+ const req = https.request(
65
+ url,
66
+ { method: 'POST', headers, createConnection: () => tls.connect({ socket, servername: t.hostname }) },
67
+ (resp) => {
68
+ let data = '';
69
+ resp.setEncoding('utf8');
70
+ resp.on('data', (c) => (data += c));
71
+ resp.on('end', () => finish(resolve, {
72
+ ok: resp.statusCode >= 200 && resp.statusCode < 300,
73
+ status: resp.statusCode,
74
+ json: () => JSON.parse(data),
75
+ text: () => data,
76
+ }));
77
+ }
78
+ );
79
+ req.setTimeout(timeout, () => req.destroy(new Error('proxy request timeout')));
80
+ req.on('error', (e) => finish(reject, e));
81
+ req.end(body);
82
+ });
83
+ connReq.end();
84
+ });
85
+ }
86
+
15
87
  // ─── Model Resolution ────────────────────────────────────────────────────────
16
88
 
17
89
  // CLI name → API model ID mapping
@@ -247,6 +319,44 @@ export async function callModelJSON(prompt, model = 'haiku', opts) {
247
319
  return parseJsonFromLLM(result.text);
248
320
  }
249
321
 
322
+ /**
323
+ * JSON-returning, FULLY-ASYNC model call for the long-lived server hot path
324
+ * (deep-search auto-escalation). Like callModelJSON, but every CLI invocation —
325
+ * cli-mode primary AND the post-provider-failure fallback — uses the
326
+ * non-blocking callModelCLIAsync, so a keyed-provider outage can never drop onto
327
+ * the blocking execFileSync path and freeze the MCP event loop (D#40). Never
328
+ * throws; returns parsed JSON or null.
329
+ * @param {string|{system?:string,user:string}} prompt
330
+ * @param {'haiku'|'sonnet'} model
331
+ * @param {{timeout?:number,maxTokens?:number,temperature?:number}} [opts]
332
+ * @returns {Promise<object|null>}
333
+ */
334
+ export async function callModelJSONAsync(prompt, model = 'haiku', { timeout = 15000, maxTokens = 1000, temperature = DEFAULT_LLM_TEMPERATURE } = {}) {
335
+ if (!prompt) return null;
336
+ const resolvedModel = MODEL_MAP[model] ? model : 'haiku';
337
+ const mode = detectMode();
338
+
339
+ if (mode === 'cli') {
340
+ const res = await callModelCLIAsync(prompt, resolvedModel, { timeout });
341
+ return res?.text ? parseJsonFromLLM(res.text) : null;
342
+ }
343
+
344
+ // Keyed provider (api/openrouter): try it, then degrade to the ASYNC CLI on any
345
+ // failure — NOT the blocking execFileSync callModelCLI that callModelJSON uses.
346
+ let primary = null;
347
+ try {
348
+ primary = mode === 'api'
349
+ ? await callModelAPI(prompt, resolvedModel, { timeout, maxTokens, temperature })
350
+ : await callOpenRouterAPI(prompt, resolvedModel, { timeout, maxTokens, temperature });
351
+ } catch (e) {
352
+ debugCatch(e, `callModelJSONAsync:${mode}:${resolvedModel}`);
353
+ }
354
+ if (primary?.text) return parseJsonFromLLM(primary.text);
355
+
356
+ const res = await callModelCLIAsync(prompt, resolvedModel, { timeout });
357
+ return res?.text ? parseJsonFromLLM(res.text) : null;
358
+ }
359
+
250
360
  async function callModelAPI(prompt, model, { timeout, maxTokens, temperature = DEFAULT_LLM_TEMPERATURE }) {
251
361
  const apiKey = process.env.ANTHROPIC_API_KEY;
252
362
  if (!apiKey) return null;
@@ -319,6 +429,72 @@ function callModelCLI(prompt, model, { timeout }) {
319
429
  }
320
430
  }
321
431
 
432
+ /**
433
+ * Async, non-blocking sibling of callModelCLI for the long-lived MCP server hot
434
+ * path (deep-search auto-escalation, D#40). execFileSync blocks the event loop for
435
+ * the whole subprocess lifetime — acceptable in short-lived hook processes
436
+ * (callModelCLI), not inside an MCP request handler. Uses spawn + stdin so the
437
+ * untrusted query stays out of argv (ps-visible) and the boundary-marker model is
438
+ * preserved. Never rejects: resolves {text} on non-empty stdout, null on
439
+ * error/empty. On timeout it SIGKILLs the child with NO retry (fail-fast) and
440
+ * salvages a complete JSON payload from partial stdout (mirrors callModelCLI's
441
+ * catch-salvage; tolerant of Haiku's ```json fencing per #8605, which the upstream
442
+ * parseJsonFromLLM strips).
443
+ * @param {string|{system?:string,user:string}} prompt
444
+ * @param {'haiku'|'sonnet'} model
445
+ * @param {{timeout:number}} opts SIGKILL after `timeout` ms; no retry.
446
+ * @returns {Promise<{text:string}|null>}
447
+ */
448
+ export function callModelCLIAsync(prompt, model, { timeout }) {
449
+ return new Promise((resolve) => {
450
+ const modelName = MODEL_MAP[model] ? model : 'haiku';
451
+ let child;
452
+ try {
453
+ child = spawn(getClaudePath(), ['-p', '--model', modelName], {
454
+ env: { ...process.env, CLAUDE_MEM_HOOK_RUNNING: '1' },
455
+ cwd: '/tmp',
456
+ stdio: ['pipe', 'pipe', 'pipe'],
457
+ });
458
+ } catch (e) {
459
+ debugCatch(e, `${model}-cli-async`);
460
+ resolve(null);
461
+ return;
462
+ }
463
+ let stdout = '';
464
+ let settled = false;
465
+ const done = (val) => {
466
+ if (settled) return;
467
+ settled = true;
468
+ clearTimeout(timer);
469
+ resolve(val);
470
+ };
471
+ const timer = setTimeout(() => {
472
+ try { child.kill('SIGKILL'); } catch { /* already gone */ }
473
+ const t = stdout.trim();
474
+ if (t.startsWith('{') && t.endsWith('}')) {
475
+ try { JSON.parse(t); done({ text: t }); return; } catch { /* not complete JSON */ }
476
+ }
477
+ done(null);
478
+ }, timeout);
479
+ child.stdout?.setEncoding('utf8'); // decode multi-byte UTF-8 (CJK) across chunk boundaries
480
+ child.stdout?.on('data', (d) => { stdout += d; });
481
+ child.stderr?.on('data', () => {}); // drain stderr so a chatty child can't block on a full pipe
482
+ child.on('error', (e) => { debugCatch(e, `${model}-cli-async`); done(null); });
483
+ child.on('close', () => {
484
+ const t = stdout.trim();
485
+ done(t ? { text: t } : null);
486
+ });
487
+ // EPIPE guard: the child may exit before we finish writing stdin.
488
+ child.stdin?.on('error', () => {});
489
+ try {
490
+ child.stdin?.write(flattenForCLI(prompt));
491
+ child.stdin?.end();
492
+ } catch (e) {
493
+ debugCatch(e, `${model}-cli-async:stdin`);
494
+ }
495
+ });
496
+ }
497
+
322
498
  // ─── API Mode ────────────────────────────────────────────────────────────────
323
499
 
324
500
  async function callHaikuAPI(prompt, { timeout, maxTokens, temperature = DEFAULT_LLM_TEMPERATURE }) {
@@ -389,17 +565,20 @@ async function callOpenRouterAPI(prompt, tier, { timeout, maxTokens, temperature
389
565
  if (system) messages.push({ role: 'system', content: system });
390
566
  messages.push({ role: 'user', content: user });
391
567
 
392
- const res = await fetch('https://openrouter.ai/api/v1/chat/completions', {
393
- method: 'POST',
394
- headers: {
395
- 'Content-Type': 'application/json',
396
- 'Authorization': `Bearer ${apiKey}`,
397
- // Optional OpenRouter attribution headers (ignored by the API if absent).
398
- 'X-Title': 'claude-mem-lite',
399
- },
400
- body: JSON.stringify({ model, max_tokens: maxTokens, temperature, messages }),
401
- signal: controller.signal,
402
- });
568
+ const url = 'https://openrouter.ai/api/v1/chat/completions';
569
+ const reqHeaders = {
570
+ 'Content-Type': 'application/json',
571
+ 'Authorization': `Bearer ${apiKey}`,
572
+ // Optional OpenRouter attribution headers (ignored by the API if absent).
573
+ 'X-Title': 'claude-mem-lite',
574
+ };
575
+ const reqBody = JSON.stringify({ model, max_tokens: maxTokens, temperature, messages });
576
+ // Native fetch ignores HTTP(S)_PROXY; when a proxy is configured, tunnel the
577
+ // request through it — a direct fetch to openrouter.ai times out behind one.
578
+ const proxy = httpConnectProxyFor(url);
579
+ const res = proxy
580
+ ? await postViaConnectProxy(proxy, url, { headers: reqHeaders, body: reqBody, timeout })
581
+ : await fetch(url, { method: 'POST', headers: reqHeaders, body: reqBody, signal: controller.signal });
403
582
 
404
583
  if (!res.ok) {
405
584
  debugLog('WARN', `${tier}-openrouter`, `HTTP ${res.status}`);
package/mem-cli.mjs CHANGED
@@ -182,7 +182,7 @@ async function cmdSearch(db, args, { llm } = {}) {
182
182
  orFallbackFired: false,
183
183
  };
184
184
 
185
- const runDeep = async () => {
185
+ const runDeep = async ({ auto = false } = {}) => {
186
186
  const ds = await deepSearch(db, {
187
187
  query,
188
188
  project: project || null,
@@ -194,7 +194,7 @@ async function cmdSearch(db, args, { llm } = {}) {
194
194
  epochTo: dateTo,
195
195
  limit: perSourceLimit,
196
196
  currentProject: project ? null : inferProject(),
197
- }, llm ? { llm } : undefined);
197
+ }, llm ? { llm } : { auto });
198
198
  deepVariants = ds.variants;
199
199
  if (deepVariants.length > 1) {
200
200
  process.stderr.write(`[mem] Deep search: rewrote into ${deepVariants.length} query variants, RRF-fused\n`);
@@ -212,7 +212,7 @@ async function cmdSearch(db, args, { llm } = {}) {
212
212
  if (obsCtx.orFallbackFired) orFallbackFired = true;
213
213
  if (deepMode === 'auto' && autoDeepLlmReady(process.env, llm) && shouldEscalateToDeep(obsResults, obsCtx) && hasEscalatableCorpus(db, project || null)) {
214
214
  process.stderr.write(`[mem] auto-escalated to deep search (weak results: ${obsResults.length} hits)\n`);
215
- obsResults = await runDeep();
215
+ obsResults = await runDeep({ auto: true });
216
216
  isDeep = true;
217
217
  }
218
218
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "claude-mem-lite",
3
- "version": "3.2.0",
3
+ "version": "3.3.1",
4
4
  "description": "Persistent long-term memory for Claude Code via MCP — captures coding decisions, bugfixes, and context across sessions. Hybrid FTS5 + TF-IDF search with episode batching. Single SQLite DB, no external services. A lighter, lower-cost alternative to claude-mem (episode batching + a smaller model; cost savings are an internal estimate, not a measured benchmark).",
5
5
  "type": "module",
6
6
  "packageManager": "npm@10.9.2",
package/server.mjs CHANGED
@@ -370,7 +370,7 @@ async function runSearchPipeline(db, args, { llm } = {}) {
370
370
  let escalatedObsCount = 0;
371
371
 
372
372
  // Helper: run deepSearch and load results into the shared `results` array.
373
- const runDeepInto = async () => {
373
+ const runDeepInto = async ({ auto = false } = {}) => {
374
374
  const { results: deepRows, variants } = await deepSearch(db, {
375
375
  query: args.query,
376
376
  project: args.project || null,
@@ -381,7 +381,7 @@ async function runSearchPipeline(db, args, { llm } = {}) {
381
381
  epochFrom, epochTo,
382
382
  limit: perSourceLimit,
383
383
  currentProject,
384
- }, llm ? { llm } : undefined);
384
+ }, llm ? { llm } : { auto });
385
385
  // Safe to reset: sessions/prompts are pushed AFTER the obs block, so nothing is lost here.
386
386
  results.length = 0;
387
387
  results.push(...deepRows);
@@ -405,7 +405,7 @@ async function runSearchPipeline(db, args, { llm } = {}) {
405
405
  // filter makes the invariant explicit and robust to future reordering.
406
406
  const obsCountBeforeEscalation = results.length;
407
407
  if (deepMode === 'auto' && autoDeepLlmReady(process.env, llm) && shouldEscalateToDeep(results.filter(r => r.source === 'obs'), ctx) && hasEscalatableCorpus(db, args.project || null)) {
408
- await runDeepInto();
408
+ await runDeepInto({ auto: true });
409
409
  isDeep = true;
410
410
  escalated = true;
411
411
  escalatedObsCount = obsCountBeforeEscalation;