npm - claude-mem-lite - Versions diffs - 2.84.2 → 2.86.0 - Mend

claude-mem-lite 2.84.2 → 2.86.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (20) hide show

package/.claude-plugin/marketplace.json +2 -2
package/.claude-plugin/plugin.json +2 -2
package/README.md +60 -3
package/README.zh-CN.md +8 -3
package/haiku-client.mjs +127 -22
package/hook-llm.mjs +3 -3
package/hook-memory.mjs +8 -4
package/hook-shared.mjs +18 -5
package/hook-update.mjs +32 -0
package/hook.mjs +19 -14
package/install.mjs +11 -0
package/lib/citation-tracker.mjs +124 -49
package/lib/cite-back-hint.mjs +38 -6
package/lib/tmp-fixture-sweep.mjs +69 -0
package/mem-cli.mjs +57 -4
package/package.json +3 -2
package/scripts/pre-tool-recall.js +11 -7
package/server.mjs +39 -0
package/source-files.mjs +3 -0
package/tool-schemas.mjs +2 -2

package/.claude-plugin/marketplace.json CHANGED Viewed

@@ -10,9 +10,9 @@
   "plugins": [
     {
       "name": "claude-mem-lite",
-      "version": "2.84.2",
+      "version": "2.86.0",
       "source": "./",
-      "description": "Lightweight persistent memory system for Claude Code — FTS5 search, episode batching, error-triggered recall"
+      "description": "Persistent long-term memory for Claude Code via MCP — captures coding decisions, bugfixes, and context across sessions. Hybrid FTS5 + TF-IDF search with episode batching. Single SQLite DB, no external services. Alternative to claude-mem with 600x lower cost."
     }
   ]
 }

package/.claude-plugin/plugin.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
   "name": "claude-mem-lite",
-  "version": "2.84.2",
-  "description": "Lightweight persistent memory system for Claude Code — FTS5 search, episode batching, error-triggered recall",
+  "version": "2.86.0",
+  "description": "Persistent long-term memory for Claude Code via MCP — captures coding decisions, bugfixes, and context across sessions. Hybrid FTS5 + TF-IDF search with episode batching. Single SQLite DB, no external services. Alternative to claude-mem with 600x lower cost.",
   "author": {
     "name": "sdsrss"
   },

package/README.md CHANGED Viewed

@@ -2,9 +2,13 @@
 # claude-mem-lite
-Lightweight persistent memory system for [Claude Code](https://docs.anthropic.com/en/docs/claude-code). Automatically captures coding observations, decisions, and bug fixes during sessions, then provides full-text search to recall them later.
+`claude-mem-lite` is a **persistent memory** (also called *long-term memory* or *cross-session context*) system for **[Claude Code](https://docs.anthropic.com/en/docs/claude-code)** — Anthropic's CLI coding agent. It runs as an **[MCP](https://modelcontextprotocol.io/) server** plus a set of Claude Code hooks, automatically capturing coding observations, decisions, and bug fixes during sessions, then providing hybrid full-text + semantic search to recall them later.
-Built as an [MCP server](https://modelcontextprotocol.io/) + Claude Code hooks. Zero external services, single SQLite database, minimal overhead.
+Compared to general-purpose LLM memory frameworks like [`mem0`](https://github.com/mem0ai/mem0) or the MCP reference [`memory`](https://github.com/modelcontextprotocol/servers/tree/main/src/memory) server, claude-mem-lite is purpose-built for Claude Code's hook lifecycle: episode batching cuts LLM calls 7–10× vs the original [claude-mem](https://github.com/thedotmack/claude-mem) (600× lower total cost), and the hybrid FTS5 + TF-IDF retriever benchmarks at 0.88 Recall@10 / 0.96 Precision@10.
+> 中文简介：claude-mem-lite 是 Claude Code 的轻量级**持久化记忆 / 长期记忆 / 跨会话上下文**插件，基于 MCP 协议 + 钩子机制，自动捕获编码会话中的决策、修复和上下文，并通过 FTS5 + TF-IDF 混合检索召回。详见 [中文 README](README.zh-CN.md)。
+Zero external services. Single SQLite database. Minimal overhead.
 ## Why claude-mem-lite?
@@ -50,6 +54,22 @@ For a typical 50-tool-call session:
 The original sends **everything to the LLM and hopes it filters well**. claude-mem-lite **filters first with code, then sends only what matters** to a smaller model. This is not a downgrade; it's a smarter architecture that produces equivalent search quality at a fraction of the cost.
+### Comparison: memory systems for AI coding agents
+How claude-mem-lite differs from the major neighbors in the LLM-memory space (verified May 2026):
+| | **claude-mem-lite** | [`mem0`](https://github.com/mem0ai/mem0) | MCP reference [`memory`](https://github.com/modelcontextprotocol/servers/tree/main/src/memory) | [claude-mem](https://github.com/thedotmack/claude-mem) (original) |
+|---|---|---|---|---|
+| **Target client** | Claude Code only | Any LLM app via SDK | Any MCP client | Claude Code only |
+| **Capture model** | Auto via hooks | Manual `memory.add()` | Manual tool calls (`create_entities`, `add_observations`) | Auto via hooks |
+| **Code-aware retrieval** | FTS5 + 100+ synonym pairs (incl. CJK↔EN) | General-purpose | Generic graph nodes | Code-aware |
+| **Search** | Hybrid: FTS5 BM25 + TF-IDF cosine via RRF | Hybrid: semantic + BM25 + entity linking | Knowledge-graph traversal | FTS5 + Chroma vector |
+| **Storage** | Single local SQLite | Pluggable; Qdrant or configurable vector store | Single JSONL file (knowledge graph) | SQLite + Chroma |
+| **LLM dependency** | Haiku per episode (5–10 ops batched) | LLM per add/search op | None (graph CRUD only) | Sonnet per tool call |
+| **Setup** | One command (`/plugin install` or `npx`) | SDK integration + vector store config | MCP install (per-client) | Bun + Python + Chroma |
+**When to pick which**: pick `mem0` if you need a memory layer for a non-Claude-Code app (your own agent, multiple LLM providers). Pick the MCP reference `memory` server if you specifically want a knowledge-graph data model and don't mind invoking memory tools by hand. Pick claude-mem-lite if you want zero-touch automatic capture purpose-built for Claude Code's hook lifecycle, with code-domain retrieval and no external services.
 ## Features
 - **Automatic capture** -- Hooks into Claude Code lifecycle (PostToolUse, SessionStart, Stop, UserPromptSubmit) to record observations without manual effort
@@ -85,7 +105,7 @@ The original sends **everything to the LLM and hopes it filters well**. claude-m
 - **Resource registry** -- Indexes installed skills and agents with FTS5 search, composite scoring, and invocation tracking; searchable via `mem_registry` MCP tool
 - **Unified resource discovery** -- Shared filesystem traversal layer (`resource-discovery.mjs`) used by both runtime scanner and offline indexer, supporting flat directories, plugin nesting, and loose `.md` files
 - **Domain synonym expansion** -- Registry search queries expand to domain synonyms (e.g., "fix" → debug, bugfix, troubleshoot, diagnose, repair)
-- **Dual LLM mode** -- Auto-detects `ANTHROPIC_API_KEY` for direct API calls; falls back to `claude -p` CLI when no key is available
+- **Multi-provider LLM mode** -- Provider priority `ANTHROPIC_API_KEY` (direct Anthropic API) → `OPENROUTER_API_KEY` (OpenRouter, OpenAI-compatible — point it at any model via `OPENROUTER_MODEL`) → `claude -p` CLI fallback when no key is set
 - **Lesson-learned indexing** -- `lesson_learned` field indexed in FTS5 with weight 8, making past debugging insights directly searchable
 - **Cross-source normalization** -- `mem_search` normalizes scores across observations, sessions, and prompts before merging, preventing any source from dominating results
 - **Exponential recency decay** -- Type-differentiated half-lives (decisions: 90d, discoveries: 60d, bugfixes: 14d, changes: 7d) consistently applied in all ranking paths
@@ -637,11 +657,48 @@ npm run benchmark:gate    # CI gate: fails if metrics regress beyond 5% toleranc
 |----------|-------------|---------|
 | `CLAUDE_MEM_DIR` | Custom data directory. All databases, runtime files, and managed resources are stored here. | `~/.claude-mem-lite/` |
 | `CLAUDE_MEM_MODEL` | LLM model for background calls (episode extraction, session summaries). Accepts `haiku` or `sonnet`. | `haiku` |
+| `ANTHROPIC_API_KEY` | Anthropic API key. When set, all background LLM calls go directly to the Anthropic Messages API (with prompt caching). Highest priority. | _(unset → CLI)_ |
+| `OPENROUTER_API_KEY` | OpenRouter API key (OpenAI-compatible). Used for background LLM calls when `ANTHROPIC_API_KEY` is **not** set. If neither key is set, calls fall back to the `claude -p` CLI. | _(unset)_ |
+| `OPENROUTER_MODEL` | Overrides the OpenRouter model slug for **all** background calls (e.g. `openai/gpt-4o-mini`, `qwen/qwen-2.5-72b-instruct`). When unset, the `CLAUDE_MEM_MODEL` tier maps to `anthropic/claude-haiku-4.5` (haiku) or `anthropic/claude-sonnet-4.5` (sonnet). | _(tier default)_ |
 | `CLAUDE_MEM_DEBUG` | Enable debug logging (`1` to enable). | _(disabled)_ |
 | `MEM_QUIET_HOOKS` | Low-noise hooks. `1` drops the `File Lessons` / `Key Context` sections from SessionStart injection, the lesson suffix from `[mem] Related memories`, and the `WHEN TO USE` / `Decision rules` blocks from MCP server instructions. IDs and the `Recent` table still surface so `mem_get(ids=[…])` remains reachable. Intended for users running the invited-memory adopt path or who otherwise want minimal auto-injection. **Since v2.82.0 this env no longer gates auto-adopt — use `MEM_NO_AUTO_ADOPT=1` for that.** | _(disabled)_ |
 | `MEM_NO_AUTO_ADOPT` | Global opt-out for auto-adopt (v2.82.0+). `1` prevents the first-SessionStart auto-write of the invited-memory sentinel across **all** projects. For per-project opt-out use `claude-mem-lite adopt --disable` instead (writes a durable `<memdir>/.mem-no-auto-adopt` sentinel that survives marker deletion). | _(disabled)_ |
 | `MEM_NO_ADOPT_HINT` | Silences the one-line "Invited-memory 未启用：`claude-mem-lite adopt`…" hint that SessionStart appends when the current project hasn't been adopted. Since v2.82.1 auto-adopt fires on first SessionStart for any install path, so this hint typically surfaces only when you've explicitly opted out (`MEM_NO_AUTO_ADOPT=1` or `claude-mem-lite adopt --disable`). | _(disabled)_ |
+## FAQ
+### What is a memory system for Claude Code?
+A memory system lets Claude Code remember context — coding decisions, bug fixes, file history — across sessions. By default Claude Code's context resets each session; claude-mem-lite persists observations to a local SQLite database and re-injects them at session start and on relevant prompts.
+### Does Claude Code have built-in long-term memory?
+No. Claude Code's `CLAUDE.md` and `MEMORY.md` files act as static instruction memory, but there is no native dynamic recall of past sessions, bug fixes, or decisions. claude-mem-lite adds that layer via MCP and hooks, with no manual note-taking required.
+### How is claude-mem-lite different from mem0 or MCP's reference memory server?
+`mem0` and the MCP `memory` server are general-purpose LLM memory frameworks designed for any client. claude-mem-lite is purpose-built for Claude Code's hook lifecycle: it captures *episodes* (batched tool calls), uses domain-specific synonym expansion for code terms (`K8s`, `DB`, `数据库`, ...), and surfaces past observations proactively before file edits via the `PreToolUse:Edit` hook.
+### Why "lite"? What did the original claude-mem do differently?
+The original called an LLM on every tool use with raw JSON inputs. claude-mem-lite batches 5–10 operations per LLM call, uses a smaller model (Haiku), and runs a deterministic code-level filter before sending anything to the model. Net result: ~600× lower cost with equivalent search quality. See the [Architecture comparison](#architecture-comparison) above.
+### Does this work cross-project? Cross-machine?
+Project-scoped by default — each project has its own memory namespace. Single-machine only (SQLite, not networked). Use `mem_export` (JSON / JSONL) to back up or migrate between machines.
+### What about privacy? Does it call external APIs?
+Only the Haiku summarization step calls Anthropic's API (or the local `claude -p` CLI if no API key is set). All search, storage, and retrieval is local SQLite — no telemetry, no third-party services.
+### 中文常见问题
+**Claude Code 怎么跨会话记住内容？** 默认不能。claude-mem-lite 通过 MCP 协议和钩子自动把决策、bug 修复、文件历史持久化到本地 SQLite，下次会话开始时再注入。
+**和 mem0、官方 MCP memory server 有什么区别？** 那两个是通用 LLM 记忆框架；claude-mem-lite 是为 Claude Code 钩子生命周期定制的：批量 episode 处理、代码领域同义词扩展（K8s/DB/数据库等）、文件编辑前主动召回相关历史。
+**支持中文吗？** 完整支持。FTS5 + 中英文同义词扩展（100+ 对，含 CJK ↔ EN 跨语言映射），中文记忆也可用英文关键词召回，反之亦然。
 ## License
 MIT

package/README.zh-CN.md CHANGED Viewed

@@ -2,9 +2,11 @@
 # claude-mem-lite
-[Claude Code](https://docs.anthropic.com/en/docs/claude-code) 的轻量级持久化记忆系统。自动捕获编码过程中的观察、决策和问题修复，通过全文搜索随时回溯。
+`claude-mem-lite` 是 **[Claude Code](https://docs.anthropic.com/en/docs/claude-code)**（Anthropic 官方 CLI 编程代理）的 **持久化记忆系统**（也称 **长期记忆 / 跨会话上下文 / Claude Code 记忆插件**）。它以 **[MCP](https://modelcontextprotocol.io/) 服务器** + Claude Code 钩子（hooks）的形式运行，在编码会话中自动捕获观察记录、决策、bug 修复，并通过 FTS5 全文检索 + TF-IDF 向量的混合检索召回历史上下文。
-基于 [MCP 服务器](https://modelcontextprotocol.io/) + Claude Code 钩子构建。无需外部服务，单一 SQLite 数据库，开销极低。
+与 [`mem0`](https://github.com/mem0ai/mem0)、MCP 官方参考实现的 [`memory`](https://github.com/modelcontextprotocol/servers/tree/main/src/memory) 服务器等通用 LLM 记忆框架相比，claude-mem-lite 专为 Claude Code 的钩子生命周期定制：episode 批处理把 LLM 调用量相比原版 [claude-mem](https://github.com/thedotmack/claude-mem) 减少 7-10 倍（综合成本下降 600 倍），FTS5 + TF-IDF 混合检索在 30 个查询的基准上达到 **Recall@10 = 0.88 / Precision@10 = 0.96**。
+无需外部服务。单一 SQLite 数据库。开销极低。
 ## 为什么选择 claude-mem-lite？
@@ -84,7 +86,7 @@
 - **统一资源发现** -- 共享文件系统遍历层（`resource-discovery.mjs`），运行时扫描器和离线索引器共用，支持扁平目录、插件嵌套和松散 `.md` 文件
 - **领域同义词扩展** -- 注册表搜索查询自动扩展领域同义词（如 "修复" → fix, debug, bugfix, repair, error）
 - **持久化冷却机制** -- 5 分钟跨会话冷却 + 同会话去重，避免重复推荐 skill 自动加载
-- **双模式 LLM 调用** -- 自动检测 `ANTHROPIC_API_KEY` 直连 API；无 key 时回退到 `claude -p` CLI
+- **多 provider LLM 调用** -- provider 优先级 `ANTHROPIC_API_KEY`（直连 Anthropic API）→ `OPENROUTER_API_KEY`（OpenRouter，OpenAI 兼容，可用 `OPENROUTER_MODEL` 指向任意模型）→ 无 key 时回退 `claude -p` CLI
 - **Haiku 熔断器** -- 连续 3 次 LLM 失败后，禁用 Haiku 调度 5 分钟，防止级联延迟
 - **否定意图感知** -- 正确处理 "不要测试了，先修 bug" 等复杂提示，排除被否定的意图，支持中英文混合输入
 - **可配置 LLM 模型** -- 通过 `CLAUDE_MEM_MODEL` 环境变量在 Haiku（快速/低成本）和 Sonnet（深度分析）之间切换
@@ -597,6 +599,9 @@ npm run benchmark:gate    # CI 门控：指标回退超过 5% 容差时失败
 |------|------|--------|
 | `CLAUDE_MEM_DIR` | 自定义数据目录。所有数据库、运行时文件和托管资源均存储在此。 | `~/.claude-mem-lite/` |
 | `CLAUDE_MEM_MODEL` | 后台 LLM 调用模型（Episode 提取、会话总结、调度）。可选 `haiku` 或 `sonnet`。 | `haiku` |
+| `ANTHROPIC_API_KEY` | Anthropic API key。设置后所有后台 LLM 调用直连 Anthropic Messages API（带 prompt caching），优先级最高。 | _(未设 → CLI)_ |
+| `OPENROUTER_API_KEY` | OpenRouter API key（OpenAI 兼容）。当**未设** `ANTHROPIC_API_KEY` 时用于后台 LLM 调用；两者都未设则回退到 `claude -p` CLI。 | _(未设)_ |
+| `OPENROUTER_MODEL` | 覆盖**所有**后台调用的 OpenRouter 模型 slug（如 `openai/gpt-4o-mini`、`qwen/qwen-2.5-72b-instruct`）。未设时按 `CLAUDE_MEM_MODEL` 分层映射到 `anthropic/claude-haiku-4.5`（haiku）或 `anthropic/claude-sonnet-4.5`（sonnet）。 | _(分层默认)_ |
 | `CLAUDE_MEM_DEBUG` | 启用调试日志（设为 `1` 启用）。 | _(禁用)_ |
 | `MEM_QUIET_HOOKS` | 低噪声 hook。设为 `1` 时，SessionStart 注入去掉 `File Lessons` / `Key Context` 两节，`[mem] Related memories` 去掉 lesson 后缀，MCP server instructions 去掉 `WHEN TO USE` / `Decision rules` 两段。ID 与 `Recent` 表仍保留，`mem_get(ids=[…])` 可继续展开细节。适用于启用了 invited-memory adopt 流程或偏好最小化自动注入的用户。**v2.82.0 起此 env 不再阻挡 auto-adopt——如需关闭 auto-adopt 用 `MEM_NO_AUTO_ADOPT=1`。** | _(禁用)_ |
 | `MEM_NO_AUTO_ADOPT` | auto-adopt 全局关闭开关（v2.82.0+）。设为 `1` 阻止首次 SessionStart 在**所有**项目自动写入邀请式 memory 哨兵。项目级关闭走 `claude-mem-lite adopt --disable`（写 `<memdir>/.mem-no-auto-adopt` 哨兵，存活于 marker 删除）。 | _(禁用)_ |

package/haiku-client.mjs CHANGED Viewed

@@ -1,7 +1,9 @@
 // claude-mem-lite: Unified LLM call wrapper
 // Shared by memory (hook.mjs) and dispatch modules
-// Auto-detects API key for direct calls, falls back to claude CLI
-// Model configurable via CLAUDE_MEM_MODEL env var (default: haiku)
+// Provider priority: ANTHROPIC_API_KEY (direct Anthropic API) →
+// OPENROUTER_API_KEY (OpenRouter, OpenAI-compatible) → claude CLI fallback
+// Model configurable via CLAUDE_MEM_MODEL (haiku|sonnet); OpenRouter slug
+// overridable via OPENROUTER_MODEL
 import { execFileSync } from 'child_process';
 import { readFileSync } from 'fs';
@@ -30,18 +32,47 @@ export function resolveModel() {
   return { cli, api };
 }
+// OpenRouter uses its own slug namespace (OpenAI-compatible API). Map the
+// project's haiku/sonnet tiers to the matching anthropic/* slugs so the quality
+// tiering is preserved when routing through OpenRouter. Slugs verified against
+// openrouter.ai (2026-06): claude-haiku-4.5 / claude-sonnet-4.5 mirror the
+// native MODEL_MAP IDs above.
+const OPENROUTER_MODEL_MAP = {
+  haiku: 'anthropic/claude-haiku-4.5',
+  sonnet: 'anthropic/claude-sonnet-4.5',
+};
+/**
+ * Resolve the OpenRouter model slug for a given tier.
+ * OPENROUTER_MODEL (if set, non-blank) overrides every tier with an explicit
+ * slug — this is how users point claude-mem-lite at any OpenRouter model
+ * (e.g. openai/gpt-4o-mini, qwen/...). Otherwise the tier maps to its default
+ * anthropic/* slug, falling back to the haiku slug for unknown tiers.
+ * @param {string} tier 'haiku' | 'sonnet'
+ * @returns {string} OpenRouter model slug
+ */
+export function resolveOpenRouterModel(tier) {
+  const override = (process.env.OPENROUTER_MODEL || '').trim();
+  if (override) return override;
+  return OPENROUTER_MODEL_MAP[tier] || OPENROUTER_MODEL_MAP.haiku;
+}
 // ─── Mode Detection ──────────────────────────────────────────────────────────
 let _mode = null;
 /**
- * Detect whether to use direct API or CLI for LLM calls.
- * Cached after first call.
- * @returns {'api'|'cli'} The detected mode
+ * Detect which provider to use for LLM calls. Priority (per user contract):
+ * ANTHROPIC_API_KEY → direct Anthropic API ('api', native, supports prompt
+ * caching), else OPENROUTER_API_KEY → OpenRouter ('openrouter', OpenAI-compat),
+ * else fall back to the `claude` CLI ('cli'). Cached after first call.
+ * @returns {'api'|'openrouter'|'cli'} The detected mode
  */
 export function detectMode() {
   if (_mode) return _mode;
-  _mode = process.env.ANTHROPIC_API_KEY ? 'api' : 'cli';
+  if (process.env.ANTHROPIC_API_KEY) _mode = 'api';
+  else if (process.env.OPENROUTER_API_KEY) _mode = 'openrouter';
+  else _mode = 'cli';
   const { cli } = resolveModel();
   debugLog('DEBUG', 'haiku-client', `mode: ${_mode}, model: ${cli}`);
   return _mode;
@@ -102,8 +133,9 @@ export function flattenForCLI(input) {
 /**
  * Call Haiku model with a prompt. Returns parsed text or null on failure.
- * Uses direct API when ANTHROPIC_API_KEY is available, otherwise falls back to CLI.
- * Never throws — returns null on any error.
+ * Provider priority ANTHROPIC_API_KEY → OPENROUTER_API_KEY → CLI; if the keyed
+ * provider call fails (HTTP error / network throw / empty), degrades to the
+ * `claude -p` CLI. Never throws — returns null only when every path fails.
  *
  * @param {string|{system?: string, user: string}} prompt Prompt text, or split form
  * @param {object} [opts] Options
@@ -116,15 +148,28 @@ export async function callHaiku(prompt, { timeout = 10000, maxTokens = 500 } = {
   const mode = detectMode();
+  // CLI is terminal — no provider to fall back to.
+  if (mode === 'cli') {
+    try { return callHaikuCLI(prompt, { timeout }); }
+    catch (e) { debugCatch(e, 'callHaiku'); return null; }
+  }
+  // Keyed provider (api/openrouter): attempt it, then degrade to the CLI on any
+  // failure (HTTP error → null, or network/timeout throw). A region-blocked or
+  // out-of-credit key must not silently drop background summaries.
+  let primary = null;
   try {
-    if (mode === 'api') {
-      return await callHaikuAPI(prompt, { timeout, maxTokens });
-    }
-    return callHaikuCLI(prompt, { timeout });
+    primary = mode === 'api'
+      ? await callHaikuAPI(prompt, { timeout, maxTokens })
+      : await callOpenRouterAPI(prompt, resolveModel().cli, { timeout, maxTokens });
   } catch (e) {
-    debugCatch(e, 'callHaiku');
-    return null;
+    debugCatch(e, `callHaiku:${mode}`);
   }
+  if (primary) return primary;
+  debugLog('WARN', 'haiku-client', `${mode} call failed, falling back to claude CLI`);
+  try { return callHaikuCLI(prompt, { timeout }); }
+  catch (e) { debugCatch(e, 'callHaiku:cli-fallback'); return null; }
 }
 /**
@@ -143,8 +188,8 @@ export async function callHaikuJSON(prompt, opts) {
 /**
  * Call LLM with explicit model selection. Supports 'haiku' and 'sonnet'.
- * Reuses existing API/CLI dual-mode infrastructure.
- * Never throws — returns null on any error.
+ * Same provider priority + failure fallback to CLI as callHaiku.
+ * Never throws — returns null only when every path fails.
  *
  * @param {string} prompt The prompt text
  * @param {'haiku'|'sonnet'} model Model to use (default: 'haiku')
@@ -158,15 +203,27 @@ export async function callLLMWithModel(prompt, model = 'haiku', { timeout = 1500
   const resolvedModel = MODEL_MAP[model] ? model : 'haiku';
   const mode = detectMode();
+  // CLI is terminal — no provider to fall back to.
+  if (mode === 'cli') {
+    try { return callModelCLI(prompt, resolvedModel, { timeout }); }
+    catch (e) { debugCatch(e, `callLLMWithModel:${resolvedModel}`); return null; }
+  }
+  // Keyed provider (api/openrouter): attempt it, then degrade to the CLI on any
+  // failure so a region-blocked / out-of-credit key still produces output.
+  let primary = null;
   try {
-    if (mode === 'api') {
-      return await callModelAPI(prompt, resolvedModel, { timeout, maxTokens });
-    }
-    return callModelCLI(prompt, resolvedModel, { timeout });
+    primary = mode === 'api'
+      ? await callModelAPI(prompt, resolvedModel, { timeout, maxTokens })
+      : await callOpenRouterAPI(prompt, resolvedModel, { timeout, maxTokens });
   } catch (e) {
-    debugCatch(e, `callLLMWithModel:${resolvedModel}`);
-    return null;
+    debugCatch(e, `callLLMWithModel:${mode}:${resolvedModel}`);
   }
+  if (primary) return primary;
+  debugLog('WARN', 'haiku-client', `${mode} call failed, falling back to claude CLI (${resolvedModel})`);
+  try { return callModelCLI(prompt, resolvedModel, { timeout }); }
+  catch (e) { debugCatch(e, `callLLMWithModel:cli-fallback:${resolvedModel}`); return null; }
 }
 /**
@@ -299,6 +356,54 @@ async function callHaikuAPI(prompt, { timeout, maxTokens }) {
   }
 }
+// ─── OpenRouter Mode ─────────────────────────────────────────────────────────
+// OpenRouter exposes an OpenAI-compatible chat-completions API (NOT the
+// Anthropic Messages format), so the request/response shapes differ from
+// callHaikuAPI/callModelAPI: Bearer auth, `messages` with a system-role entry,
+// and the reply lives at choices[0].message.content. Anthropic's prompt-cache
+// `cache_control` field has no OpenAI-format equivalent and is omitted.
+// `tier` is the resolved model tier ('haiku'|'sonnet'); OPENROUTER_MODEL can
+// override the resulting slug entirely (see resolveOpenRouterModel).
+async function callOpenRouterAPI(prompt, tier, { timeout, maxTokens }) {
+  const apiKey = process.env.OPENROUTER_API_KEY;
+  if (!apiKey) return null;
+  const model = resolveOpenRouterModel(tier);
+  const controller = new AbortController();
+  const timer = setTimeout(() => controller.abort(), timeout);
+  try {
+    const { system, user } = splitPrompt(prompt);
+    const messages = [];
+    if (system) messages.push({ role: 'system', content: system });
+    messages.push({ role: 'user', content: user });
+    const res = await fetch('https://openrouter.ai/api/v1/chat/completions', {
+      method: 'POST',
+      headers: {
+        'Content-Type': 'application/json',
+        'Authorization': `Bearer ${apiKey}`,
+        // Optional OpenRouter attribution headers (ignored by the API if absent).
+        'X-Title': 'claude-mem-lite',
+      },
+      body: JSON.stringify({ model, max_tokens: maxTokens, messages }),
+      signal: controller.signal,
+    });
+    if (!res.ok) {
+      debugLog('WARN', `${tier}-openrouter`, `HTTP ${res.status}`);
+      return null;
+    }
+    const data = await res.json();
+    const text = data.choices?.[0]?.message?.content;
+    return text ? { text } : null;
+  } finally {
+    clearTimeout(timer);
+  }
+}
 // ─── CLI Mode ────────────────────────────────────────────────────────────────
 function callHaikuCLI(prompt, { timeout }) {

package/hook-llm.mjs CHANGED Viewed

@@ -674,7 +674,7 @@ ${actionList}`;
   if (gotSlot) {
     let raw, parsed;
     try {
-      raw = callLLM(prompt);
+      raw = await callLLM(prompt);
       parsed = parseJsonFromLLM(raw);
     } finally {
       releaseLLMSlot();
@@ -721,7 +721,7 @@ ${actionList}`;
         retryAttempted = true;
         try {
           const retryPrompt = buildLessonRetryPrompt(episode, parsed);
-          const retryRaw = callLLM(retryPrompt, 10000);
+          const retryRaw = await callLLM(retryPrompt, 10000);
           if (retryRaw) {
             const retry = parseJsonFromLLM(retryRaw);
             const retryLesson = typeof retry?.lesson === 'string' ? retry.lesson.trim() : '';
@@ -974,7 +974,7 @@ ${obsList}`;
     let raw, llmParsed;
     try {
-      raw = callLLM(prompt, 20000);
+      raw = await callLLM(prompt, 20000);
       llmParsed = parseJsonFromLLM(raw);
     } finally {
       releaseLLMSlot();

package/hook-memory.mjs CHANGED Viewed

@@ -34,14 +34,18 @@ function getCoverageThreshold() {
 }
 // v2.41: cross-project boost (applied to decisions/discoveries from other
-// projects). Default 0.7 = 30% penalty vs same-project hits — tuned for multi-
-// project installs where transferable insights are the minority of matches.
+// projects). Default 0.4 = 60% penalty vs same-project hits. Was 0.7 (30%), but
+// a cross-project audit found that a 30% discount let strongly-matching but
+// off-topic decisions still win injection slots in unrelated projects (e.g. an
+// FTS5 SQL gotcha surfacing in a UI session). Transferable insights are the
+// minority of cross-project matches, so the penalty should be steep; raise it
+// back via env for installs that want more sharing.
 // Env override `MEM_CROSS_PROJECT_BOOST` ∈ [0, 1]; clamped, invalid → default.
 function getCrossProjectBoost() {
   const raw = process.env.MEM_CROSS_PROJECT_BOOST;
-  if (raw === undefined || raw === '') return 0.7;
+  if (raw === undefined || raw === '') return 0.4;
   const n = parseFloat(raw);
-  return Number.isFinite(n) && n >= 0 && n <= 1 ? n : 0.7;
+  return Number.isFinite(n) && n >= 0 && n <= 1 ? n : 0.4;
 }
 function extractQueryTerms(text) {
   if (!text) return [];

package/hook-shared.mjs CHANGED Viewed

@@ -7,7 +7,7 @@ import { join } from 'path';
 import { existsSync, readFileSync, writeFileSync, mkdirSync, renameSync, readdirSync, statSync, unlinkSync } from 'fs';
 import { inferProject, debugCatch } from './utils.mjs';
 import { ensureDb, DB_DIR } from './schema.mjs';
-import { getClaudePath as getClaudePathShared, resolveModel as resolveModelShared, flattenForCLI as _flattenForCLI } from './haiku-client.mjs';
+import { getClaudePath as getClaudePathShared, resolveModel as resolveModelShared, flattenForCLI as _flattenForCLI, detectMode as detectLLMMode, callHaiku } from './haiku-client.mjs';
 // Phase D: invited-memory sentinel detection. memdir.mjs only pulls in fs/path/os/crypto;
 // adopt-content.mjs is pure strings. No circular deps — memdir doesn't import hook-shared.
 import { memdirPath as _memdirPath, isAdopted as _isAdopted } from './memdir.mjs';
@@ -130,13 +130,26 @@ export function openDb() {
   }
 }
-// ─── LLM via claude CLI ─────────────────────────────────────────────────────
+// ─── LLM (provider-routed: Anthropic API → OpenRouter → claude CLI) ─────────
 // Accepts either a plain string (legacy) or {system, user} (defense-in-depth
 // against prompt injection from poisoned user_prompts content — cso F#4 fix).
-// CLI mode renders the {system, user} form via flattenForCLI which inserts an
-// explicit data-boundary marker; API mode uses the system role natively.
-export function callLLM(prompt, timeoutMs = 15000) {
+// Provider priority mirrors haiku-client (ANTHROPIC_API_KEY > OPENROUTER_API_KEY
+// > CLI): when a key is present, delegate to callHaiku — it owns the Anthropic
+// Messages / OpenRouter chat-completions request shapes, uses the system role
+// natively, AND degrades to the `claude -p` CLI internally if the keyed provider
+// fails (so a region-blocked / out-of-credit key still yields a summary). The
+// keyless case shells out to `claude -p` directly here, where flattenForCLI
+// renders {system, user} with an explicit data-boundary marker. Returns the raw
+// response string (callers run parseJsonFromLLM themselves) or null.
+// maxTokens is sized for session-summary / episode JSON (larger than the
+// registry/optimize callers' budgets).
+export async function callLLM(prompt, timeoutMs = 15000) {
+  if (detectLLMMode() !== 'cli') {
+    const result = await callHaiku(prompt, { timeout: timeoutMs, maxTokens: 2000 });
+    return result?.text ?? null;
+  }
   const { cli: modelName } = resolveModelShared();
   try {
     const result = execFileSync(getClaudePathShared(), ['-p', '--model', modelName], {

package/hook-update.mjs CHANGED Viewed

@@ -101,6 +101,38 @@ export async function checkForUpdate(options = {}) {
   }
 }
+// ── Non-blocking SessionStart helpers (audit P3d) ──────────────────────────
+// Previously handleSessionStart `await checkForUpdate()` inline, blocking the
+// session up to ~3-6s on a GitHub fetch once per 24h. These two helpers split
+// that: emit the banner from CACHED state (zero network) and let the network
+// refresh run in a detached background worker, so SessionStart never blocks.
+// Banner string from cached update-state (≤24h stale), or null. No network I/O.
+export function getCachedUpdateBanner() {
+  try {
+    if (isDevMode() || process.env.CLAUDE_MEM_SKIP_UPDATE) return null;
+    const state = readState();
+    if (state.updateAvailable && state.latestVersion) {
+      // Cached "available" state only persists for deferred installs (plugin mode
+      // / allowInstall=false); a successful auto-install clears updateAvailable.
+      const hint = isPluginMode()
+        ? ' — plugin mode only checks for updates; reinstall/update the plugin to apply it'
+        : '';
+      return `\n📦 claude-mem-lite: v${state.latestVersion} available (current: v${state.installedVersion})${hint}\n`;
+    }
+    return null;
+  } catch { return null; }
+}
+// True when a network refresh is due (24h throttle) and updates aren't disabled.
+// Caller spawns the refresh in the background so this session doesn't wait.
+export function isUpdateCheckDue() {
+  try {
+    if (isDevMode() || process.env.CLAUDE_MEM_SKIP_UPDATE) return false;
+    return shouldCheck(readState());
+  } catch { return false; }
+}
 function isPluginMode() {
   return Boolean(process.env.CLAUDE_PLUGIN_ROOT);
 }

package/hook.mjs CHANGED Viewed

@@ -57,11 +57,11 @@ import { extractTailAssistantText, extractStructuredSummary } from './lib/summar
 import { searchRelevantMemories, formatMemoryLine } from './hook-memory.mjs';
 import { detectMemOverride } from './lib/mem-override.mjs';
 import { buildAndSaveHandoff, detectContinuationIntent, renderHandoffInjection, pickHandoffToInject, extractUnfinishedSummary } from './hook-handoff.mjs';
-import { checkForUpdate } from './hook-update.mjs';
+import { checkForUpdate, getCachedUpdateBanner, isUpdateCheckDue } from './hook-update.mjs';
 import { handleLLMOptimize } from './hook-optimize.mjs';
 import { silentAutoAdopt, hasAutoAdoptMarker } from './adopt-cli.mjs';
 import { emitV270UpgradeBanner } from './lib/upgrade-banner.mjs';
-import { loadCiteBackForEpisode, buildUnsavedBugfixHint, countUnsavedBugfixShape, buildCiteRecallNudge as libBuildCiteRecallNudge } from './lib/cite-back-hint.mjs';
+import { loadCiteBackForEpisode, buildUnsavedBugfixHint, countUnsavedBugfixShape, buildCiteRecallNudge as libBuildCiteRecallNudge, nextCiteLowStreak } from './lib/cite-back-hint.mjs';
 // plugin-cache-guard.mjs loaded dynamically — pre-2.31.2 installs that auto-upgraded
 // from an older hook-update.mjs SOURCE_FILES (which did not list this module) would
 // crash on static import. Degrade gracefully to no-op when the module is absent.
@@ -570,8 +570,13 @@ async function handleStop() {
             // alongside cite-recall. Same scan target (transcript already in OS
             // cache); same persistence file; one extra line in buildCiteRecallNudge.
             const bugfixStats = countUnsavedBugfixShape(transcriptPath);
-            const payload = { ...stats, ...bugfixStats, project, savedAt: Date.now() };
             const dest = join(RUNTIME_DIR, `cite-recall-${project.replace(/[^a-zA-Z0-9_.-]/g, '-').slice(0, 64)}.json`);
+            // Carry the consecutive-low-cite streak forward so the SessionStart
+            // nag can self-silence after the project has ignored it N times.
+            let priorStreak = 0;
+            try { priorStreak = JSON.parse(readFileSync(dest, 'utf8')).lowStreak || 0; } catch {}
+            const lowStreak = nextCiteLowStreak(priorStreak, stats);
+            const payload = { ...stats, ...bugfixStats, lowStreak, project, savedAt: Date.now() };
             writeFileSync(dest, JSON.stringify(payload), { mode: 0o600 });
           } catch (e) { debugCatch(e, 'handleStop-cite-recall-persist'); }
         }
@@ -1181,18 +1186,14 @@ async function handleSessionStart() {
     // Pre-load TF-IDF vocabulary cache for this session (from DB, ~1ms)
     try { getVocabulary(db); } catch (e) { debugCatch(e, 'session-start-vocab'); }
-    // Auto-update check (24h throttle, 3s timeout, silent on failure)
-    // Awaited so process.exit(0) doesn't kill the promise before notification
+    // Auto-update check (audit P3d): NON-BLOCKING. Emit the banner from cached
+    // state (zero network) and, if the 24h check is due, refresh in a detached
+    // background worker so SessionStart never blocks on a GitHub fetch (was an
+    // inline `await checkForUpdate()` that could stall the session 3-6s).
     try {
-      const updateResult = await checkForUpdate();
-      if (updateResult?.updated) {
-        process.stdout.write(`\n🔄 claude-mem-lite: v${updateResult.from} → v${updateResult.to} updated\n`);
-      } else if (updateResult?.updateAvailable) {
-        const hint = updateResult.installDeferred
-          ? ' — plugin mode only checks for updates; reinstall/update the plugin to apply it'
-          : '';
-        process.stdout.write(`\n📦 claude-mem-lite: v${updateResult.to} available (current: v${updateResult.from})${hint}\n`);
-      }
+      const banner = getCachedUpdateBanner();
+      if (banner) process.stdout.write(banner);
+      if (isUpdateCheckDue()) spawnBackground('update-check');
     } catch (e) { debugCatch(e, 'session-start-update'); }
   } finally {
@@ -1496,6 +1497,10 @@ try {
     case 'llm-summary':      await handleLLMSummary(); break;
     case 'auto-compress':    handleAutoCompress(); break;
     case 'llm-optimize':   await handleLLMOptimize(); break;
+    // Detached update refresh spawned by handleSessionStart (audit P3d) — does the
+    // GitHub fetch + (non-plugin) install off the SessionStart critical path,
+    // writing update-state.json so the NEXT session's cached banner is fresh.
+    case 'update-check':     await checkForUpdate(); break;
   }
 } catch (err) {
   // Always log fatal errors (ungated) with structured format

package/install.mjs CHANGED Viewed

@@ -30,6 +30,7 @@ import { RESOURCE_METADATA } from './install-metadata.mjs';
 import { scanPluginCacheHookPollution } from './plugin-cache-guard.mjs';
 import { SOURCE_FILES, HOOK_SCRIPT_FILES } from './source-files.mjs';
 import { probeBetterSqlite3Binding, ensureBetterSqlite3Working } from './lib/binding-probe.mjs';
+import { sweepStaleTestFixtures } from './lib/tmp-fixture-sweep.mjs';
 // Re-export for backward compatibility — tests/install-hook-scripts.test.mjs
 // and any external consumers still import HOOK_SCRIPT_FILES from install.mjs.
@@ -1753,6 +1754,16 @@ function cleanup() {
     }
   }
+  // Reap leaked test-fixture sandboxes from temp (mem-e2e-* / mem-audit-* / cite-*
+  // etc.) left by interrupted vitest runs — the §8.V4 disposal gap the audit found
+  // (~795MB). 24h age here (vs 1h in the test reaper) is conservative for a manual
+  // cleanup. Scans os.tmpdir() and the Claude Code temp root, depth-1, mem-prefixes
+  // only — never touches other tools' temp dirs.
+  const fixtureRoots = [tmpdir(), join(homedir(), '.claude', 'tmp')];
+  const swept = sweepStaleTestFixtures({ dirs: fixtureRoots, ageMs: 24 * 60 * 60 * 1000, dryRun });
+  for (const p of swept.names) ok(`${dryRun ? 'Would remove' : 'Removed'}: ${p}`);
+  removed += swept.removed;
   const verb = dryRun ? 'would be removed' : 'removed';
   console.log(`\n  ${removed === 0 ? 'No stale files found.' : `${removed} stale file(s) ${verb}.`}\n`);
 }

package/lib/citation-tracker.mjs CHANGED Viewed

@@ -151,26 +151,42 @@ export function bumpCitationAccess(db, ids, project) {
   return n;
 }
-// Matches a pre-tool-recall lesson line: `  #NN [type] body...`. Bounded type
-// list mirrors observations.type CHECK + the events table's allowed event_type
-// values pre-tool-recall.js can surface.
+// Matches a pre-tool-recall / error-recall lesson line: `  #NN [type] body...`.
+// Bounded type list mirrors observations.type CHECK + the events table's allowed
+// event_type values these surfaces can emit.
 const INJECTED_RE = /#(\d{1,7})\s+\[(bugfix|decision|change|discovery|feature|refactor|lesson)\]/g;
+// Add a numeric obs id to `set` if it parses to a sane in-range positive int.
+function addObsId(set, raw) {
+  const id = Number(raw);
+  if (Number.isInteger(id) && id > 0 && id < 1e7) set.add(id);
+}
+// Claude Code records a registered hook command (e.g. `node "${CLAUDE_PLUGIN_ROOT}/hook.mjs" user-prompt`)
+// VERBATIM with the path quote-wrapped: `node "/abs/hook.mjs" user-prompt`. A
+// naive `.includes('hook.mjs user-prompt')` then fails because the `"` sits
+// between the path and the subcommand — this was the bug that made the entire
+// UserPromptSubmit injection surface invisible to citation-decay in every real
+// install (tests only ever used unquoted commands, so it was never caught).
+// Strip shell quotes before substring-matching so command detection is robust to
+// plugin-cache vs symlinked-install AND quoted vs unquoted path forms.
+function normalizeHookCommand(command) {
+  return (command || '').replace(/["']/g, '');
+}
 /**
- * Extract observation IDs injected by pre-tool-recall hook in this transcript.
- *
- * Tighter than `computeCiteRecall`'s over-inclusive "any #NN in non-assistant
- * text" — only counts IDs the agent actually saw from us, not user-pasted
- * references or unrelated #NN tokens in tool output.
+ * Walk every `hook_success` attachment in a transcript, invoking `fn` with the
+ * quote-normalized command and the injected text (JSON additionalContext
+ * unwrapped when present, else raw stdout). Shared by all injection extractors
+ * so command-matching + JSON-unwrap logic lives in exactly one place.
  *
  * @param {string|null|undefined} transcriptPath
- * @returns {Set<number>} unique injected IDs (empty set on missing path/file)
+ * @param {(ctx: {command: string, text: string}) => void} fn
  */
-export function extractInjectedFromPreToolUse(transcriptPath) {
-  const ids = new Set();
-  if (!transcriptPath || !existsSync(transcriptPath)) return ids;
+function eachHookAttachment(transcriptPath, fn) {
+  if (!transcriptPath || !existsSync(transcriptPath)) return;
   let raw;
-  try { raw = readFileSync(transcriptPath, 'utf8'); } catch { return ids; }
+  try { raw = readFileSync(transcriptPath, 'utf8'); } catch { return; }
   for (const line of raw.split('\n')) {
     if (!line.trim()) continue;
     let entry;
@@ -178,83 +194,140 @@ export function extractInjectedFromPreToolUse(transcriptPath) {
     if (entry.type !== 'attachment') continue;
     const att = entry.attachment;
     if (!att || att.type !== 'hook_success') continue;
-    if (!(att.command || '').includes('pre-tool-recall')) continue;
     const stdout = att.stdout || '';
     if (!stdout) continue;
-    // stdout is JSON wrapping additionalContext OR raw text (legacy);
-    // try JSON first and fall back to raw.
+    // stdout is JSON wrapping additionalContext OR raw text (triggerErrorRecall
+    // and the <memory-context> block write raw). Try JSON first, fall back to raw.
     let text = stdout;
     try {
       const parsed = JSON.parse(stdout);
       text = parsed?.hookSpecificOutput?.additionalContext || stdout;
     } catch {}
+    fn({ command: normalizeHookCommand(att.command), text });
+  }
+}
+/**
+ * Extract observation IDs injected by pre-tool-recall hook in this transcript.
+ *
+ * Tighter than `computeCiteRecall`'s over-inclusive "any #NN in non-assistant
+ * text" — only counts IDs the agent actually saw from us, not user-pasted
+ * references or unrelated #NN tokens in tool output.
+ *
+ * @param {string|null|undefined} transcriptPath
+ * @returns {Set<number>} unique injected IDs (empty set on missing path/file)
+ */
+export function extractInjectedFromPreToolUse(transcriptPath) {
+  const ids = new Set();
+  eachHookAttachment(transcriptPath, ({ command, text }) => {
+    if (!command.includes('pre-tool-recall')) return;
     INJECTED_RE.lastIndex = 0;
     let m;
-    while ((m = INJECTED_RE.exec(text))) {
-      const id = Number(m[1]);
-      if (Number.isInteger(id) && id > 0 && id < 1e7) ids.add(id);
-    }
-  }
+    while ((m = INJECTED_RE.exec(text))) addObsId(ids, m[1]);
+  });
   return ids;
 }
 // v34.x: UserPromptSubmit injection extractor. hook.mjs handleUserPrompt emits
 // formatMemoryLine `- [type] title | Lesson: X (#NN)[ [verify-before-use]]`,
-// which INJECTED_RE (anchored on `#NN [type]`) never matched — leaving the
-// highest-volume injection surface invisible to applyCitationDecay. The two
-// extractors are disjoint by design: PTR has `[type]` AFTER `#NN`, UPS has
-// `(#NN)` at end-of-line.
+// which INJECTED_RE (anchored on `#NN [type]`) never matched — leaving this
+// injection surface invisible to applyCitationDecay. The extractors are disjoint
+// by design: PTR has `[type]` AFTER `#NN`, UPS has `(#NN)` at end-of-line.
 //
 // Line-scan with `- [` prefix gate so a lesson body containing a back-reference
 // like "see (#999)" doesn't pollute the injected set (would streak-uncite an
 // obs we never actually displayed as a top-level entry).
 const UPS_LINE_PREFIX = '- [';
 const UPS_ID_RE = /\(#(\d{1,7})\)/g;
+// Quote-normalized (see normalizeHookCommand): real recorded command is
+// `node "/abs/hook.mjs" user-prompt` → normalized to `node /abs/hook.mjs user-prompt`.
 const UPS_COMMAND_SUFFIX = 'hook.mjs user-prompt';
 /**
  * Extract observation IDs injected by the UserPromptSubmit `<memory-context>`
  * block (hook.mjs handleUserPrompt). Disjoint from pre-tool-recall extraction —
- * the Stop handler unions both via extractAllInjected.
+ * the Stop handler unions all surfaces via extractAllInjected.
  *
  * @param {string|null|undefined} transcriptPath
  * @returns {Set<number>}
  */
 export function extractInjectedFromUserPromptSubmit(transcriptPath) {
   const ids = new Set();
-  if (!transcriptPath || !existsSync(transcriptPath)) return ids;
-  let raw;
-  try { raw = readFileSync(transcriptPath, 'utf8'); } catch { return ids; }
-  for (const line of raw.split('\n')) {
-    if (!line.trim()) continue;
-    let entry;
-    try { entry = JSON.parse(line); } catch { continue; }
-    if (entry.type !== 'attachment') continue;
-    const att = entry.attachment;
-    if (!att || att.type !== 'hook_success') continue;
-    // Suffix match — survives plugin-cache vs symlinked-install path differences.
-    if (!(att.command || '').includes(UPS_COMMAND_SUFFIX)) continue;
-    const stdout = att.stdout || '';
-    if (!stdout.includes('<memory-context')) continue;
-    for (const memLine of stdout.split('\n')) {
+  eachHookAttachment(transcriptPath, ({ command, text }) => {
+    if (!command.includes(UPS_COMMAND_SUFFIX)) return;
+    if (!text.includes('<memory-context')) return;
+    for (const memLine of text.split('\n')) {
       if (!memLine.startsWith(UPS_LINE_PREFIX)) continue;
       // Take the LAST (#NN) on the line — formatMemoryLine puts the obs id
       // in trailing parens, possibly followed by ` [verify-before-use]`. Any
-      // earlier (#NN) refs are inside title/lesson text (per the test that
-      // pins "see (#999)" → NOT extracted).
+      // earlier (#NN) refs are inside title/lesson text.
       const matches = [...memLine.matchAll(UPS_ID_RE)];
       if (matches.length === 0) continue;
-      const id = Number(matches[matches.length - 1][1]);
-      if (Number.isInteger(id) && id > 0 && id < 1e7) ids.add(id);
+      addObsId(ids, matches[matches.length - 1][1]);
     }
-  }
+  });
+  return ids;
+}
+/**
+ * Extract observation IDs injected by the PostToolUse error-recall hint
+ * (hook.mjs triggerErrorRecall → `[claude-mem-lite] Related memories found for
+ * this error:` followed by `  #NN [type] title` lines, delivered via
+ * post-tool-use.sh). This is a high-volume surface that NO extractor matched
+ * before — error-recall'd obs accrued injection_count but never reached
+ * applyCitationDecay, so they could neither promote nor demote.
+ *
+ * @param {string|null|undefined} transcriptPath
+ * @returns {Set<number>}
+ */
+export function extractInjectedFromErrorRecall(transcriptPath) {
+  const ids = new Set();
+  eachHookAttachment(transcriptPath, ({ command, text }) => {
+    if (!command.includes('post-tool-use')) return;
+    if (!text.includes('Related memories found for this error')) return;
+    // INJECTED_RE requires `#NN [type]`, so the trailing
+    // `→ Use mem_get(ids=[7933,8455])` line (bare numbers) is not matched.
+    INJECTED_RE.lastIndex = 0;
+    let m;
+    while ((m = INJECTED_RE.exec(text))) addObsId(ids, m[1]);
+  });
+  return ids;
+}
+// user-prompt-search.js formatResults emits `[mem] FYI — Related memories ...`
+// then one `#NN <icon> title` row per obs (raw stdout, line-leading id). Distinct
+// from the `<memory-context>` block (hook.mjs) — the two UPS injectors dedup obs
+// by id at inject time, so they carry DISJOINT obs sets; both must be extracted
+// or the FYI-carried (highest-importance keyContext) obs never reach decay.
+const FYI_HEADER = '[mem] FYI — Related memories';
+// Anchored at line start so `P#NN` past-question rows (user_prompts, different id
+// space) and any `#NN` inside lesson text are NOT matched.
+const FYI_LINE_ID_RE = /^#(\d{1,7})\s/;
+/**
+ * Extract observation IDs injected by the user-prompt-search.js `[mem] FYI —
+ * Related memories` block.
+ *
+ * @param {string|null|undefined} transcriptPath
+ * @returns {Set<number>}
+ */
+export function extractInjectedFromFyi(transcriptPath) {
+  const ids = new Set();
+  eachHookAttachment(transcriptPath, ({ command, text }) => {
+    if (!command.includes('user-prompt-search')) return;
+    if (!text.includes(FYI_HEADER)) return;
+    for (const fyiLine of text.split('\n')) {
+      const m = FYI_LINE_ID_RE.exec(fyiLine);
+      if (m) addObsId(ids, m[1]);
+    }
+  });
   return ids;
 }
 /**
- * Union of pre-tool-recall + UserPromptSubmit injection IDs for a transcript.
- * Single integration point the Stop handler calls — keeps hook.mjs's wiring
- * a one-liner and gives the contract test something to assert against.
+ * Union of every injection surface's IDs for a transcript: pre-tool-recall +
+ * UserPromptSubmit `<memory-context>` + PostToolUse error-recall + the
+ * user-prompt-search FYI block. Single integration point the Stop handler calls.
  *
  * @param {string|null|undefined} transcriptPath
  * @returns {Set<number>}
@@ -263,6 +336,8 @@ export function extractAllInjected(transcriptPath) {
   return new Set([
     ...extractInjectedFromPreToolUse(transcriptPath),
     ...extractInjectedFromUserPromptSubmit(transcriptPath),
+    ...extractInjectedFromErrorRecall(transcriptPath),
+    ...extractInjectedFromFyi(transcriptPath),
   ]);
 }

package/lib/cite-back-hint.mjs CHANGED Viewed

@@ -172,10 +172,40 @@ export function countUnsavedBugfixShape(transcriptPath) {
 //     the bugfix-shape heuristic already requires ≥3 entries)
 // Either gate can fire independently. Both off → empty string (no surface).
 //
+// Self-silence: after this many consecutive qualifying sessions where the
+// project's cite-recall stayed below threshold, stop emitting the ratio nag — a
+// project that has ignored the cite-#NN ask N times running is not going to
+// start because we asked again; further nags are pure context noise (the audit's
+// "nag-at-0%-compliance" anti-pattern). The streak resets the moment cite-recall
+// recovers, so the nudge re-engages if behavior changes. Env override:
+// CLAUDE_MEM_CITE_NUDGE_SILENCE_AFTER (0 = never silence).
+export const CITE_NUDGE_SILENCE_AFTER = 3;
+// True iff this session's stats satisfy the ratio-nag gate (low cite-recall with
+// enough injection volume to judge). Shared by buildCiteRecallNudge (decide to
+// nag) and nextCiteLowStreak (decide to keep silencing).
+function ratioGateFires(data, env) {
+  const threshold = Number(env.CLAUDE_MEM_CITE_NUDGE_THRESHOLD) || 0.6;
+  const minInjected = Number(env.CLAUDE_MEM_CITE_NUDGE_MIN_INJECTED) || 5;
+  return typeof data?.injected === 'number'
+    && typeof data?.ratio === 'number'
+    && data.injected >= minInjected
+    && data.ratio < threshold;
+}
+// Next consecutive-low-cite streak: increment when the ratio gate fires this
+// session, reset to 0 otherwise (recovery, or too few injections to judge).
+export function nextCiteLowStreak(priorStreak, stats, env = process.env) {
+  const prior = Number.isFinite(priorStreak) ? priorStreak : 0;
+  return ratioGateFires(stats, env) ? prior + 1 : 0;
+}
 // Env opt-outs:
 //   • CLAUDE_MEM_NO_CITE_NUDGE=1 — disables BOTH gates (full silence)
 //   • CLAUDE_MEM_CITE_NUDGE_THRESHOLD — ratio gate threshold (default 0.6)
 //   • CLAUDE_MEM_CITE_NUDGE_MIN_INJECTED — ratio gate min-volume (default 5)
+//   • CLAUDE_MEM_CITE_NUDGE_SILENCE_AFTER — consecutive-low streak before the
+//     ratio nag self-silences (default 3; 0 = never silence)
 export function buildCiteRecallNudge(project, runtimeDir, env = process.env) {
   if (env.CLAUDE_MEM_NO_CITE_NUDGE === '1') return '';
   try {
@@ -183,13 +213,15 @@ export function buildCiteRecallNudge(project, runtimeDir, env = process.env) {
     const path = join(runtimeDir, `cite-recall-${safe}.json`);
     const raw = readFileSync(path, 'utf8');
     const data = JSON.parse(raw);
-    const threshold = Number(env.CLAUDE_MEM_CITE_NUDGE_THRESHOLD) || 0.6;
-    const minInjected = Number(env.CLAUDE_MEM_CITE_NUDGE_MIN_INJECTED) || 5;
+    const silenceAfter = env.CLAUDE_MEM_CITE_NUDGE_SILENCE_AFTER !== undefined
+      ? Number(env.CLAUDE_MEM_CITE_NUDGE_SILENCE_AFTER)
+      : CITE_NUDGE_SILENCE_AFTER;
+    // silenceAfter > 0 AND the project has ignored the nag that many times running.
+    const silenced = silenceAfter > 0
+      && typeof data.lowStreak === 'number'
+      && data.lowStreak >= silenceAfter;
     const lines = [];
-    if (typeof data.injected === 'number'
-      && typeof data.ratio === 'number'
-      && data.injected >= minInjected
-      && data.ratio < threshold) {
+    if (!silenced && ratioGateFires(data, env)) {
       const pct = Math.round(data.ratio * 100);
       lines.push(`[mem] Last session cite-recall ${pct}% (${data.recalled}/${data.injected}) — when injected lessons (#NN lines) inform your action, cite #NN explicitly so the contract loop stays observable.`);
     }

package/lib/tmp-fixture-sweep.mjs ADDED Viewed

@@ -0,0 +1,69 @@
+// Sweep stale claude-mem-lite test-fixture directories from temp dirs.
+//
+// Tests create sandboxes via mkdtempSync(join(tmpdir(), '<prefix>')) and clean
+// them in afterEach — but an interrupted or SIGKILL'd vitest run never reaches
+// afterEach, leaking the dir (and its DBs) forever. The cross-project audit
+// found ~795MB of such residue (mem-e2e-* / mem-audit-* dominating). Per-test
+// cleanup cannot survive SIGKILL, so we ALSO reap at the next run's start
+// (globalSetup) and via `node install.mjs cleanup`.
+//
+// Safety: depth-1 only (no recursion — §8 forbids deep traversal of ~/.claude),
+// age-gated so a concurrently-running suite isn't disturbed, and restricted to a
+// conservative allowlist of clearly mem-namespaced prefixes so we never delete
+// another tool's temp dirs (e.g. code-graph-mcp's `.tmp*`/`index.db`).
+import { readdirSync, statSync, rmSync } from 'fs';
+import { join } from 'path';
+import { tmpdir } from 'os';
+// Prefixes used by THIS repo's mkdtempSync fixtures. Deliberately only the
+// clearly mem-namespaced ones — generic prefixes some tests use (plans-/tasks-/
+// metrics-/drift-/projects-/git-fixture-) are EXCLUDED to avoid collateral
+// deletion of unrelated /tmp dirs.
+export const TEST_FIXTURE_PREFIXES = [
+  'mem-', 'cite-', 'memdir-', 'adopt-', 'citation-test-',
+  'text-floor-', 'unsaved-bugfix-', 'hook-telemetry-', 'hook-latency-',
+  'quiet-hooks-', 'silent-adopt-', 'sweep-orphan-', 'pre-recall-sandbox-',
+  'err-sampler-', 'cml-preflight-', 'cli-audit-',
+];
+export const DEFAULT_FIXTURE_AGE_MS = 60 * 60 * 1000; // 1h — wide margin over the longest test
+function isFixtureName(name) {
+  return TEST_FIXTURE_PREFIXES.some(p => name.startsWith(p));
+}
+/**
+ * Remove (or, with dryRun, list) stale test-fixture directories.
+ *
+ * @param {object} [opts]
+ * @param {string[]} [opts.dirs] temp roots to scan, depth-1 (default [os.tmpdir()])
+ * @param {number} [opts.ageMs] only act on entries older than this (default 1h)
+ * @param {boolean} [opts.dryRun] when true, list but do not delete
+ * @param {number} [opts.now] injectable clock for tests
+ * @returns {{removed: number, names: string[]}} absolute paths removed (or that would be)
+ */
+export function sweepStaleTestFixtures({ dirs, ageMs = DEFAULT_FIXTURE_AGE_MS, dryRun = false, now = Date.now() } = {}) {
+  const roots = (dirs && dirs.length) ? dirs : [tmpdir()];
+  const cutoff = now - ageMs;
+  const names = [];
+  const seen = new Set();
+  for (const root of roots) {
+    if (!root || seen.has(root)) continue;
+    seen.add(root);
+    let entries;
+    try { entries = readdirSync(root); } catch { continue; }
+    for (const name of entries) {
+      if (!isFixtureName(name)) continue;
+      const full = join(root, name);
+      try {
+        const st = statSync(full);
+        if (!st.isDirectory()) continue;
+        if (st.mtimeMs >= cutoff) continue; // too fresh — may be an in-flight run
+        if (!dryRun) rmSync(full, { recursive: true, force: true });
+        names.push(full);
+      } catch { /* concurrent unlink / permission — ignore */ }
+    }
+  }
+  return { removed: names.length, names };
+}

package/mem-cli.mjs CHANGED Viewed

@@ -1869,7 +1869,7 @@ function cmdMaintain(db, args) {
   const { positional, flags } = parseArgs(args);
   const action = positional[0];
   if (!action || !['scan', 'execute'].includes(action)) {
-    fail("[mem] Usage: claude-mem-lite maintain <scan|execute> [--ops cleanup,decay,boost,dedup,purge_stale,rebuild_vectors] [--project P] [--retain-days N] [--merge-ids keepId:removeId,...] — 'scan' previews, 'execute' applies.");
+    fail("[mem] Usage: claude-mem-lite maintain <scan|execute> [--ops cleanup,decay,boost,demote_pinned,dedup,purge_stale,rebuild_vectors,vacuum] [--project P] [--retain-days N] [--merge-ids keepId:removeId,...] — 'scan' previews, 'execute' applies.");
     return;
   }
@@ -1879,6 +1879,10 @@ function cmdMaintain(db, args) {
   const STALE_AGE_MS = 30 * 86400000;
   const SCAN_LIMIT = 500;
   const SIMILARITY_THRESHOLD = 0.7;
+  // demote_pinned threshold: a memory injected this many times with zero
+  // citations is "pinned noise" the regular `decay` op can't touch (decay
+  // protects injection_count > 0). 8 aligns with the noise-penalty tier-2 cut.
+  const PINNED_INJ_THRESHOLD = 8;
   if (action === 'scan') {
     const staleAge = Date.now() - STALE_AGE_MS;
@@ -1915,7 +1919,10 @@ function cmdMaintain(db, args) {
         COALESCE(SUM(CASE WHEN (title IS NULL OR title = '') AND (narrative IS NULL OR narrative = '')
                  THEN 1 ELSE 0 END), 0) as broken,
         COALESCE(SUM(CASE WHEN COALESCE(access_count, 0) > 3 AND COALESCE(importance, 1) < 3
-                 THEN 1 ELSE 0 END), 0) as boostable
+                 THEN 1 ELSE 0 END), 0) as boostable,
+        COALESCE(SUM(CASE WHEN COALESCE(injection_count, 0) >= ${PINNED_INJ_THRESHOLD}
+                      AND COALESCE(cited_count, 0) = 0 AND COALESCE(importance, 1) > 1
+                 THEN 1 ELSE 0 END), 0) as pinned
       FROM observations
       WHERE COALESCE(compressed_into, 0) = 0 ${projectFilter}
     `).get(staleAge, ...baseParams);
@@ -1930,6 +1937,7 @@ function cmdMaintain(db, args) {
     out(`  Stale (>30d, imp=1, no access): ${stats.stale}`);
     out(`  Broken (no title/narrative): ${stats.broken}`);
     out(`  Boostable (accessed>3, imp<3): ${stats.boostable}`);
+    out(`  Pinned-but-uncited (inj>=${PINNED_INJ_THRESHOLD}, cited=0, imp>1): ${stats.pinned} — run: maintain execute --ops demote_pinned`);
     out(`  Pending purge: ${pendingPurge.count} (compressed originals awaiting cleanup)`);
     if (duplicates.length > 0) {
       const AUTO_MERGE_THRESHOLD = 0.85;
@@ -1962,7 +1970,7 @@ function cmdMaintain(db, args) {
   }
   // Execute
-  const VALID_OPS = ['cleanup', 'decay', 'boost', 'dedup', 'purge_stale', 'rebuild_vectors'];
+  const VALID_OPS = ['cleanup', 'decay', 'boost', 'demote_pinned', 'dedup', 'purge_stale', 'rebuild_vectors', 'vacuum'];
   const opsStr = flags.ops || 'cleanup,decay,boost';
   const ops = opsStr.split(',').map(s => s.trim());
   const invalidOps = ops.filter(op => !VALID_OPS.includes(op));
@@ -2024,6 +2032,31 @@ function cmdMaintain(db, args) {
       results.push(`Decayed ${decayed.changes} stale observations, marked ${idleMarked.changes} idle as pending-purge${decayCap}`);
     }
+    if (ops.includes('demote_pinned')) {
+      // Repair the citation-decay blind spot: the `decay` op above PROTECTS
+      // injection_count > 0 rows, so a memory injected many times but never
+      // cited stays pinned at max importance and keeps dominating injection
+      // forever (the entrenched-noise pool the extractor bug let accumulate).
+      // Target the inverse signal — heavy injection, zero citations — and drop
+      // importance to 1 in a SINGLE pass. Injection priority is binary
+      // (importance >= 2 → full weight; hook-memory.mjs), so a gentle 3→2 step
+      // would leave the obs dominating injection just the same; only reaching 1
+      // actually de-ranks it. Floors at 1 (not 0/purge) so a later boost (access)
+      // or a genuine cite can still rescue a useful entry.
+      const demoted = db.prepare(`
+        UPDATE observations SET importance = 1
+        WHERE id IN (
+          SELECT id FROM observations
+          WHERE COALESCE(compressed_into, 0) = 0
+            AND COALESCE(injection_count, 0) >= ${PINNED_INJ_THRESHOLD}
+            AND COALESCE(cited_count, 0) = 0
+            AND COALESCE(importance, 1) > 1
+            ${projectFilter} LIMIT ${OP_CAP}
+        )
+      `).run(...baseParams);
+      results.push(`Demoted ${demoted.changes} pinned-but-uncited observations to importance 1 (inj>=${PINNED_INJ_THRESHOLD}, cited=0)${capHint(demoted.changes)}`);
+    }
     if (ops.includes('boost')) {
       const boosted = db.prepare(`
         UPDATE observations SET importance = MIN(3, COALESCE(importance, 1) + 1)
@@ -2137,6 +2170,24 @@ function cmdMaintain(db, args) {
     }
   }
+  // vacuum: reclaim freelist pages left behind by DELETEs (purge_stale / cleanup
+  // / dedup). DELETE only grows the freelist; the file never shrinks without
+  // VACUUM, which is absent everywhere else (auto_vacuum=0). Must run OUTSIDE any
+  // transaction. Whole-DB regardless of --project. Reports freelist before/after
+  // as the §7 reclaim metric.
+  if (ops.includes('vacuum')) {
+    try {
+      const pageSize = db.pragma('page_size', { simple: true });
+      const freeBefore = db.pragma('freelist_count', { simple: true });
+      db.exec('VACUUM');
+      const freeAfter = db.pragma('freelist_count', { simple: true });
+      const reclaimedMB = ((Math.max(0, freeBefore - freeAfter) * pageSize) / 1048576).toFixed(1);
+      results.push(`VACUUM: reclaimed ~${reclaimedMB}MB (freelist ${freeBefore} → ${freeAfter} pages)`);
+    } catch (e) {
+      results.push(`VACUUM failed — ${e.message}`);
+    }
+  }
   out(`[mem] ${results.join('\n[mem] ')}`);
 }
@@ -2575,10 +2626,12 @@ Commands:
     --project P         Filter by project
   maintain <scan|execute>  Memory maintenance
-    --ops O             Comma-separated: cleanup,decay,boost,dedup,purge_stale,rebuild_vectors
+    --ops O             Comma-separated: cleanup,decay,boost,demote_pinned,dedup,purge_stale,rebuild_vectors,vacuum
     --merge-ids K:R,... For dedup: keepId:removeId pairs (e.g. 10:11,20:21:22)
     --project P         Filter by project
     --retain-days N     For purge_stale: keep last N days (default 30)
+                        demote_pinned: importance→1 for inj>=8 & cited=0 (clears pinned noise)
+                        vacuum: reclaim freelist dead space (whole-DB, ignores --project)
   optimize              LLM-powered memory optimization (preview by default)
     --run               Execute (default: preview gates)

package/package.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
   "name": "claude-mem-lite",
-  "version": "2.84.2",
-  "description": "Lightweight persistent memory system for Claude Code",
+  "version": "2.86.0",
+  "description": "Persistent long-term memory for Claude Code via MCP — captures coding decisions, bugfixes, and context across sessions. Hybrid FTS5 + TF-IDF search with episode batching. Single SQLite DB, no external services. Alternative to claude-mem with 600x lower cost.",
   "type": "module",
   "packageManager": "npm@10.9.2",
   "engines": {
@@ -58,6 +58,7 @@
     "lib/private-strip.mjs",
     "lib/citation-tracker.mjs",
     "lib/cite-back-hint.mjs",
+    "lib/tmp-fixture-sweep.mjs",
     "lib/summary-extractor.mjs",
     "lib/id-routing.mjs",
     "lib/err-sampler.mjs",

package/scripts/pre-tool-recall.js CHANGED Viewed

@@ -329,14 +329,18 @@ try {
           lines.push(`  #${r.id} [${r.type}] ${title}`);
         }
       }
-    } else if (!isRead) {
-      // R-4: Edit/Write empty → short backfill reminder. Two goals: (1) Claude
-      // sees that the system actually ran, (2) Claude is nudged to save a lesson
-      // after a non-obvious bug. Reminder is one line to keep per-Edit cost low.
+    } else if (!isRead && process.env.CLAUDE_MEM_PRETOOL_NUDGE === '1') {
+      // R-4: Edit/Write empty → short backfill reminder. OPT-IN (default off) as
+      // of the cross-project audit: this "no prior lessons, remember to /lesson"
+      // reminder fired on ~70% of Edit/Write recalls and drove zero observed
+      // /lesson calls — pure context noise, mostly on brand-new files that by
+      // definition can't have a lesson. Save-nudging now lives at Stop time
+      // (buildCiteRecallNudge's unsaved-bugfix line + the cite-back hint), which
+      // has the full episode to judge whether a real fix happened. Set
+      // CLAUDE_MEM_PRETOOL_NUDGE=1 to restore the per-Edit reminder.
       //
-      // v2.34.6: Read does NOT emit this nudge. Read is passive — the agent
-      // isn't necessarily about to solve anything, so /lesson prompts are noise.
-      // Empty Reads exit silently, saving ~60 tokens × (every empty-file Read).
+      // Read never emitted this (passive). The cooldown write below still runs on
+      // every branch, so Read→Edit dedup + cite-back lessonId tracking are intact.
       lines.push(`[mem] PreToolUse recall — system-injected context, continue your planned action:`);
       lines.push(`[mem] No prior lessons for ${fname} — if you solve a non-obvious bug here, run: /lesson --file ${fname} "<root cause + fix>"`);
     }

package/server.mjs CHANGED Viewed

@@ -1484,6 +1484,29 @@ server.registerTool(
           results.push(`Boosted ${boosted.changes} frequently-accessed observations` + (boosted.changes >= OP_ROW_CAP ? ' (cap reached, re-run for more)' : ''));
         }
+        if (ops.includes('demote_pinned')) {
+          // CLI-parity (cmdMaintain): repair the citation-decay blind spot. The
+          // `decay` op protects injection_count > 0, so a memory injected many
+          // times but never cited stays pinned at max importance and keeps
+          // dominating injection. Target heavy-injection + zero-citation and
+          // drop importance to 1 in one pass — injection priority is binary
+          // (importance>=2), so a 3→2 step would not de-rank it. Floor 1 (not
+          // purge). PINNED_INJ_THRESHOLD=8.
+          const demoted = db.prepare(`
+            UPDATE observations SET importance = 1
+            WHERE id IN (
+              SELECT id FROM observations
+              WHERE COALESCE(compressed_into, 0) = 0
+                AND COALESCE(injection_count, 0) >= 8
+                AND COALESCE(cited_count, 0) = 0
+                AND COALESCE(importance, 1) > 1
+                ${projectFilter}
+              LIMIT ${OP_ROW_CAP}
+            )
+          `).run(...baseParams);
+          results.push(`Demoted ${demoted.changes} pinned-but-uncited observations to importance 1 (inj>=8, cited=0)` + (demoted.changes >= OP_ROW_CAP ? ' (cap reached, re-run for more)' : ''));
+        }
         if (ops.includes('dedup') && args.merge_ids) {
           let totalMerged = 0;
           const mergeStmt = db.prepare('UPDATE observations SET compressed_into = ? WHERE id = ? AND COALESCE(compressed_into, 0) = 0');
@@ -1557,6 +1580,22 @@ server.registerTool(
         }
       }
+      // vacuum: reclaim freelist dead space left by DELETEs. CLI-parity
+      // (cmdMaintain). Must run OUTSIDE any transaction; whole-DB.
+      if (ops.includes('vacuum')) {
+        try {
+          const pageSize = db.pragma('page_size', { simple: true });
+          const freeBefore = db.pragma('freelist_count', { simple: true });
+          db.exec('VACUUM');
+          const freeAfter = db.pragma('freelist_count', { simple: true });
+          const reclaimedMB = ((Math.max(0, freeBefore - freeAfter) * pageSize) / 1048576).toFixed(1);
+          results.push(`VACUUM: reclaimed ~${reclaimedMB}MB (freelist ${freeBefore} → ${freeAfter} pages)`);
+        } catch (e) {
+          debugCatch(e, 'vacuum');
+          results.push(`VACUUM failed — ${e.message}`);
+        }
+      }
       return { content: [{ type: 'text', text: results.join('\n') }] };
     }

package/source-files.mjs CHANGED Viewed

@@ -41,6 +41,9 @@ export const SOURCE_FILES = [
   'lib/private-strip.mjs',
   'lib/citation-tracker.mjs',
   'lib/cite-back-hint.mjs',
+  // v2.85: stale test-fixture sweeper. Imported by install.mjs (cleanup) + cli.mjs.
+  // Missing from manifest → tarball ships install.mjs that ERR_MODULE_NOT_FOUND on cleanup.
+  'lib/tmp-fixture-sweep.mjs',
   'lib/summary-extractor.mjs',
   'lib/id-routing.mjs',
   'lib/err-sampler.mjs',

package/tool-schemas.mjs CHANGED Viewed

@@ -203,8 +203,8 @@ export const memOptimizeSchema = {
 export const memMaintainSchema = {
   action: z.enum(['scan', 'execute']).describe('scan=analyze candidates, execute=apply changes'),
-  operations: z.array(z.enum(['dedup', 'decay', 'cleanup', 'boost', 'purge_stale', 'rebuild_vectors'])).optional()
-    .describe('Operations: dedup=find/merge duplicate observations, decay=reduce importance of old low-value obs, cleanup=remove orphaned records, boost=promote frequently-accessed obs, purge_stale=DELETE pending-purge obs older than retain_days (requires confirm=true; first call previews), rebuild_vectors=rebuild TF-IDF vocabulary and all observation vectors'),
+  operations: z.array(z.enum(['dedup', 'decay', 'cleanup', 'boost', 'demote_pinned', 'purge_stale', 'rebuild_vectors', 'vacuum'])).optional()
+    .describe('Operations: dedup=find/merge duplicate observations, decay=reduce importance of old low-value obs, cleanup=remove orphaned records, boost=promote frequently-accessed obs, demote_pinned=importance→1 for obs injected>=8 times but never cited (clears pinned noise the decay op cannot reach), purge_stale=DELETE pending-purge obs older than retain_days (requires confirm=true; first call previews), rebuild_vectors=rebuild TF-IDF vocabulary and all observation vectors, vacuum=reclaim freelist dead space (whole-DB)'),
   merge_ids: z.preprocess(
     (v) => Array.isArray(v) ? v.map(g => Array.isArray(g) ? g.map(x => typeof x === 'string' ? parseInt(x, 10) : x) : g) : v,
     z.array(z.array(z.number().int()).min(2))