npm - claude-mem-lite - Versions diffs - 2.17.1 → 2.19.0 - Mend

claude-mem-lite 2.17.1 → 2.19.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (23) hide show

package/.claude-plugin/marketplace.json +1 -1
package/.claude-plugin/plugin.json +1 -1
package/README.md +27 -3
package/README.zh-CN.md +16 -2
package/bash-utils.mjs +109 -0
package/format-utils.mjs +71 -0
package/hash-utils.mjs +77 -0
package/hook-llm.mjs +53 -35
package/hook-memory.mjs +12 -14
package/hook-update.mjs +2 -0
package/install.mjs +2 -0
package/mem-cli.mjs +13 -23
package/package.json +7 -1
package/project-utils.mjs +37 -0
package/registry-retriever.mjs +6 -56
package/schema.mjs +73 -16
package/scripts/user-prompt-search.js +13 -72
package/secret-scrub.mjs +57 -0
package/server-internals.mjs +33 -24
package/server.mjs +148 -37
package/synonyms.mjs +211 -0
package/tool-schemas.mjs +25 -1
package/utils.mjs +16 -690

package/.claude-plugin/marketplace.json CHANGED Viewed

@@ -10,7 +10,7 @@
   "plugins": [
     {
       "name": "claude-mem-lite",
-      "version": "2.17.1",
+      "version": "2.19.0",
       "source": "./",
       "description": "Lightweight persistent memory system for Claude Code — FTS5 search, episode batching, error-triggered recall"
     }

package/.claude-plugin/plugin.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "claude-mem-lite",
-  "version": "2.17.1",
+  "version": "2.19.0",
   "description": "Lightweight persistent memory system for Claude Code — FTS5 search, episode batching, error-triggered recall",
   "author": {
     "name": "sdsrss"

package/README.md CHANGED Viewed

@@ -100,6 +100,12 @@ The original sends **everything to the LLM and hopes it filters well**. claude-m
 - **LLM concurrency control** -- File-based semaphore limits background workers to 2 concurrent LLM calls, preventing resource contention
 - **stdin overflow protection** -- Hook input truncated at 256KB with regex-based action salvage for oversized tool outputs
 - **Cross-session handoff** -- Captures session state (request, completed work, next steps, key files) on `/clear` or `/exit`, then injects context when the next session detects continuation intent via explicit keywords or FTS5 term overlap
+- **In-place observation updates** -- `mem_update` tool modifies existing observations atomically (field update + FTS text rebuild + vector re-computation in one transaction), preserving original IDs and references
+- **Bulk export** -- `mem_export` tool exports observations as JSON or JSONL, with project/type/date filtering and 1000-row pagination cap with batch guidance
+- **FTS integrity management** -- `mem_fts_check` tool verifies FTS5 index health or rebuilds indexes on demand, useful after database recovery or when search results seem wrong
+- **Atomic multi-table writes** -- `saveObservation` wraps observations + observation_files + observation_vectors INSERTs in a single `db.transaction()`, preventing orphaned rows on crash
+- **Modular NLP pipeline** -- Synonym maps, stop words, scoring constants, and query building extracted into focused modules (`synonyms.mjs`, `stop-words.mjs`, `scoring-sql.mjs`, `nlp.mjs`) for independent testing and maintenance
+- **Porter-aligned PRF** -- Pseudo-relevance feedback terms are now stemmed with the same Porter algorithm used by FTS5, ensuring PRF expansion terms match the search index
 ## Platform Support
@@ -148,7 +154,7 @@ Source files stay in the cloned repo. Update via `git pull && node install.mjs i
 ### What happens during installation
 1. **Install dependencies** -- `npm install --omit=dev` (compiles native `better-sqlite3`)
-2. **Register MCP server** -- `mem` server with 9 tools (search, timeline, get, save, stats, delete, compress, maintain, registry)
+2. **Register MCP server** -- `mem` server with 12 tools (search, timeline, get, save, update, stats, delete, compress, maintain, registry, export, fts_check)
 3. **Configure hooks** -- `PostToolUse`, `SessionStart`, `Stop`, `UserPromptSubmit` lifecycle hooks
 4. **Create data directory** -- `~/.claude-mem-lite/` (hidden) for database, runtime, and managed resource files
 5. **Auto-migrate** -- If `~/.claude-mem/` (original claude-mem) or `~/claude-mem-lite/` (pre-v0.5 unhidden) exists, migrates database and runtime files to `~/.claude-mem-lite/`, preserving the original untouched
@@ -204,10 +210,13 @@ rm -rf ~/claude-mem-lite/   # pre-v0.5 unhidden (if not auto-moved)
 | `mem_timeline` | Browse observations chronologically around an anchor point. |
 | `mem_get` | Retrieve full details for specific observation IDs (includes importance and related_ids). |
 | `mem_save` | Manually save a memory/observation. |
+| `mem_update` | Update an existing observation in-place. Preserves original ID and references. |
 | `mem_stats` | View statistics: counts, type distribution, top projects, daily activity. |
 | `mem_delete` | Delete observations by ID with preview/confirm workflow. FTS5 cleanup is automatic. |
 | `mem_compress` | Compress old low-value observations into weekly summaries to reduce noise. |
 | `mem_maintain` | Memory maintenance: scan for duplicates/stale/broken items, then execute cleanup/dedup/rebuild_vectors operations. |
+| `mem_export` | Export observations as JSON or JSONL for backup or migration. Filters by project, type, date range. |
+| `mem_fts_check` | Check FTS5 index integrity or rebuild indexes. Use when search results seem wrong or after DB recovery. |
 | `mem_registry` | Manage resource registry: search for skills/agents by need, list resources, view stats, import/remove tools, reindex. |
 ### Skill Commands (in Claude Code chat)
@@ -238,7 +247,8 @@ Five core tables with FTS5 virtual tables for search:
 id, memory_session_id, project, type, title, subtitle,
 text, narrative, concepts, facts, files_read, files_modified,
 importance, related_ids, created_at, created_at_epoch,
-lesson_learned, minhash_sig, access_count, compressed_into, search_aliases
+lesson_learned, minhash_sig, access_count, compressed_into, search_aliases,
+branch, superseded_at, superseded_by, last_accessed_at
 ```
 **session_summaries** -- LLM-generated session summaries
@@ -265,6 +275,11 @@ project, type, session_id, working_on, completed, unfinished,
 key_files, key_decisions, match_keywords, created_at_epoch
 ```
+**observation_files** -- Normalized file membership for efficient file-based recall
+```
+obs_id, filename
+```
 **observation_vectors** -- TF-IDF vector embeddings for hybrid search
 ```
 observation_id, vector (BLOB Float32Array), vocab_version, created_at_epoch
@@ -422,7 +437,16 @@ claude-mem-lite/
   tool-schemas.mjs     # Shared Zod schemas for MCP tool validation
   tfidf.mjs            # TF-IDF vector engine: tokenization, vocabulary building, vector computation, cosine similarity, RRF merge
   tier.mjs             # Temporal tier system: activity-based time window classification
-  utils.mjs            # Shared utilities: FTS5 query building, BM25 weight constants, MinHash dedup, secret scrubbing, CJK synonym extraction
+  utils.mjs            # Re-export hub: backward-compatible surface for all utility modules
+  nlp.mjs              # FTS5 query building: synonym expansion, CJK bigrams, sanitization
+  scoring-sql.mjs      # BM25 weight constants and type-differentiated decay half-lives
+  stop-words.mjs       # Shared base stop-word set for all NLP/search modules
+  synonyms.mjs         # Unified synonym source: SYNONYM_MAP (bidirectional) + DISPATCH_SYNONYMS
+  project-utils.mjs    # Shared project name resolution with in-process cache
+  secret-scrub.mjs     # API key, token, PEM, and credential pattern redaction
+  format-utils.mjs     # String formatting: truncate, typeIcon, date/time/week formatting
+  hash-utils.mjs       # MinHash signatures, Jaccard similarity for dedup
+  bash-utils.mjs       # Bash output significance detection: errors, tests, builds, deploys
   # Resource registry
   registry.mjs         # Resource registry DB: schema, CRUD, FTS5, invocation tracking
   registry-retriever.mjs # FTS5 retrieval with synonym expansion and composite scoring

package/README.zh-CN.md CHANGED Viewed

@@ -144,7 +144,7 @@ node install.mjs install
 ### 安装过程
 1. **安装依赖** -- `npm install --omit=dev`（编译原生 `better-sqlite3`）
-2. **注册 MCP 服务器** -- `mem` 服务器，包含 7 个工具（search、timeline、get、save、stats、delete、compress）
+2. **注册 MCP 服务器** -- `mem` 服务器，包含 12 个工具（search、timeline、get、save、update、stats、delete、compress、maintain、registry、export、fts_check）
 3. **配置钩子** -- `PostToolUse`、`PreToolUse`、`SessionStart`、`Stop`、`UserPromptSubmit` 生命周期钩子
 4. **创建数据目录** -- `~/.claude-mem-lite/`（隐藏目录），存放数据库、运行时和托管资源文件
 5. **自动迁移** -- 自动检测 `~/.claude-mem/`（原版 claude-mem）或 `~/claude-mem-lite/`（v0.5 前的非隐藏目录），将数据库和运行时文件迁移到 `~/.claude-mem-lite/`，原目录保持不变
@@ -200,9 +200,14 @@ rm -rf ~/claude-mem-lite/   # v0.5 前的非隐藏目录（如未自动迁移）
 | `mem_timeline` | 围绕锚点按时间顺序浏览观察。 |
 | `mem_get` | 获取指定观察 ID 的完整详情（包含重要度和关联 ID）。 |
 | `mem_save` | 手动保存记忆/观察。 |
+| `mem_update` | 原地更新已有观察，保留原始 ID 和引用关系。 |
 | `mem_stats` | 查看统计：计数、类型分布、热门项目、每日活动。 |
 | `mem_delete` | 按 ID 删除观察，支持预览/确认工作流。FTS5 自动清理。 |
 | `mem_compress` | 压缩旧的低价值观察为每周摘要，减少噪声。 |
+| `mem_maintain` | 记忆维护：扫描重复/过期/损坏条目，执行清理/去重/向量重建操作。 |
+| `mem_export` | 导出观察为 JSON 或 JSONL 格式，支持按项目、类型、日期范围过滤。 |
+| `mem_fts_check` | 检查 FTS5 索引完整性或重建索引。搜索结果异常或数据库恢复后使用。 |
+| `mem_registry` | 管理资源注册表：按需搜索技能/代理、列表、统计、导入/移除、重索引。 |
 ### 技能命令（在 Claude Code 聊天中使用）
@@ -441,7 +446,16 @@ claude-mem-lite/
   hook-semaphore.mjs   # LLM 并发控制：基于文件的信号量
   schema.mjs           # 数据库 schema：表、迁移、FTS5 的单一事实来源
   tool-schemas.mjs     # 共享 Zod schema，用于 MCP 工具校验
-  utils.mjs            # 共享工具：FTS5 查询构建、MinHash 去重、秘密擦除
+  utils.mjs            # 重导出中心：所有工具模块的向后兼容入口
+  nlp.mjs              # FTS5 查询构建：同义词扩展、CJK 二元组、查询清洗
+  scoring-sql.mjs      # BM25 权重常量和类型差异化衰减半衰期
+  stop-words.mjs       # 共享基础停用词集
+  synonyms.mjs         # 统一同义词源：SYNONYM_MAP（双向）+ DISPATCH_SYNONYMS
+  project-utils.mjs    # 共享项目名解析（含进程内缓存）
+  secret-scrub.mjs     # API 密钥、令牌、PEM 证书等凭据模式擦除
+  format-utils.mjs     # 字符串格式化：截断、类型图标、日期/时间格式化
+  hash-utils.mjs       # MinHash 签名、Jaccard 相似度（去重用）
+  bash-utils.mjs       # Bash 输出显著性检测：错误、测试、构建、部署
   # 智能调度
   dispatch.mjs         # 三级调度编排：快速过滤、上下文信号、FTS5、Haiku
   dispatch-inject.mjs  # 注入模板渲染：skill/agent 推荐

package/bash-utils.mjs ADDED Viewed

@@ -0,0 +1,109 @@
+// claude-mem-lite: Bash command analysis and file path extraction
+// Extracted from utils.mjs for focused responsibility
+import { basename } from 'path';
+/**
+ * Detect significance signals in a Bash command and its response.
+ * Checks for errors, test runs, builds, git operations, and deployments.
+ * @param {object} input Tool input with command field
+ * @param {string} response Command output text
+ * @returns {{isError: boolean, isTest: boolean, isBuild: boolean, isGit: boolean, isDeploy: boolean, isSignificant: boolean}}
+ */
+export function detectBashSignificance(input, response) {
+  const cmd = (input.command || '').toLowerCase();
+  // Skip error keyword matching when the command is a read/search operation
+  // (grep output naturally contains matched keywords like "error")
+  const isSearchCmd = /\b(grep|rg|ag|ack|cat|head|tail|less|more|find|locate|wc|file|which|type)\b/i.test(cmd);
+  const isError = !isSearchCmd
+    && /\berror\b|\bERR!|fail(ed|ure)?|exception|panic|traceback|errno|enoent|command not found/i.test(response)
+    && response.length > 15;
+  // Match actual test runner invocations, not commands that merely reference "test" as a keyword
+  const isTest = /\b(npm\s+test|npm\s+run\s+test|yarn\s+test|pnpm\s+test|pnpm\s+run\s+test|bun\s+test|go\s+test|cargo\s+test)\b/i.test(cmd)
+    || /\b(jest|pytest|vitest|mocha|cypress|playwright)\b/i.test(cmd);
+  const isBuild = /\b(build|compile|tsc|webpack|vite|rollup|esbuild|make|cargo)\b/i.test(cmd);
+  const isGit = /\bgit\s+(commit|merge|rebase|cherry-pick|push)\b/i.test(cmd);
+  const isDeploy = /\b(deploy|docker|kubectl|terraform)\b/i.test(cmd);
+  return {
+    isError, isTest, isBuild, isGit, isDeploy,
+    isSignificant: isError || isTest || isBuild || isGit || isDeploy,
+  };
+}
+const ERROR_STOP_WORDS = new Set([
+  'error', 'failed', 'cannot', 'could', 'with', 'from', 'that', 'this',
+  'have', 'been', 'were', 'does', 'will', 'would', 'should', 'must',
+  'true', 'false', 'null', 'undefined', 'function', 'return', 'const',
+  'node', 'require', 'stack', 'trace',
+]);
+/**
+ * Extract discriminative keywords from a failed command and its error output.
+ * Filters out common stop words to produce useful FTS5 search terms.
+ * @param {string} cmd The command that was executed
+ * @param {string} response The error output text
+ * @returns {string[]|null} Array of 1-6 keywords or null if none found
+ */
+export function extractErrorKeywords(cmd, response) {
+  const words = new Set();
+  const cmdParts = cmd.split(/[\s/\\|&;]+/).filter(w => w.length > 2 && !/^-/.test(w));
+  for (const w of cmdParts.slice(0, 3)) {
+    const lw = w.toLowerCase();
+    if (!ERROR_STOP_WORDS.has(lw)) words.add(lw);
+  }
+  const errLines = response.split('\n').filter(l =>
+    /error|fail|exception|cannot|not found|undefined|null/i.test(l)
+  ).slice(0, 3);
+  for (const line of errLines) {
+    const tokens = line.replace(/[^a-zA-Z0-9_.-]/g, ' ').split(/\s+/)
+      .filter(w => w.length > 3 && !/^\d+$/.test(w));
+    for (const t of tokens.slice(0, 5)) {
+      const lt = t.toLowerCase();
+      if (!ERROR_STOP_WORDS.has(lt)) words.add(lt);
+    }
+  }
+  const result = [...words].slice(0, 6);
+  return result.length >= 1 ? result : null;
+}
+// ─── File Paths ──────────────────────────────────────────────────────────────
+/**
+ * Extract file paths from tool input (file_path, path, filePath, or command args).
+ * Deduplicates and excludes /dev/, /proc/, and /tmp/ paths.
+ * @param {object} input Tool input object
+ * @returns {string[]} Unique array of file paths
+ */
+export function extractFilePaths(input) {
+  const paths = [];
+  if (input.file_path) paths.push(input.file_path);
+  if (input.path) paths.push(input.path);
+  if (input.filePath) paths.push(input.filePath);
+  if (input.command) {
+    // Match absolute paths; extension optional to support Makefile, Dockerfile etc.
+    const match = input.command.match(/(?:^|\s)(\/[\w./-]+\w)/g);
+    if (match) {
+      for (const m of match) {
+        const p = m.trim();
+        if (!p.startsWith('/dev/') && !p.startsWith('/proc/') && !p.startsWith('/tmp/')
+          // Skip single-component paths like /exit, /clear — likely slash commands, not files
+          && (p.indexOf('/', 1) !== -1 || /\.\w+$/.test(p))) {
+          paths.push(p);
+        }
+      }
+    }
+  }
+  return [...new Set(paths)];
+}
+// ─── Episode Logic ───────────────────────────────────────────────────────────
+/**
+ * Strip test/spec/e2e suffixes from a filename for sibling matching.
+ * Example: auth.test.ts → auth.ts, auth.spec.js → auth.js
+ * @param {string} filePath File path to strip
+ * @returns {string} Basename with test suffix removed
+ */
+export function stripTestSuffix(filePath) {
+  return basename(filePath).replace(/\.(test|spec|e2e)\./i, '.');
+}

package/format-utils.mjs ADDED Viewed

@@ -0,0 +1,71 @@
+// claude-mem-lite: String formatting and display utilities
+// Extracted from utils.mjs for focused responsibility
+/**
+ * Truncate a string to a maximum length, replacing newlines with spaces.
+ * @param {string} str Input string
+ * @param {number} [max=80] Maximum character length
+ * @returns {string} Truncated string with ellipsis if needed
+ */
+export function truncate(str, max = 80) {
+  if (!str) return '';
+  str = str.replace(/\n/g, ' ').trim();
+  return str.length > max ? str.slice(0, max - 1) + '\u2026' : str;
+}
+/**
+ * Map observation type to its display emoji icon.
+ * @param {string} type Observation type (decision, bugfix, feature, etc.)
+ * @returns {string} Emoji icon for the type
+ */
+export function typeIcon(type) {
+  const icons = { decision: '\uD83D\uDFE1', bugfix: '\uD83D\uDD34', feature: '\uD83D\uDFE2', refactor: '\uD83D\uDD35', discovery: '\uD83D\uDD0D', change: '\uD83D\uDCDD' };
+  return icons[type] || '\u26AA';
+}
+// ─── Date Formatting ─────────────────────────────────────────────────────────
+const MONTHS = ['Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep','Oct','Nov','Dec'];
+/**
+ * Format an ISO date string as "Mon DD HH:MM" for compact display.
+ * @param {string} iso ISO 8601 date string
+ * @returns {string} Formatted date or empty string
+ */
+export function fmtDate(iso) {
+  if (!iso) return '';
+  const d = new Date(iso);
+  const mon = MONTHS[d.getUTCMonth()];
+  const day = d.getUTCDate();
+  const h = String(d.getUTCHours()).padStart(2, '0');
+  const m = String(d.getUTCMinutes()).padStart(2, '0');
+  return `${mon} ${day} ${h}:${m}`;
+}
+/**
+ * Format an ISO date string as "HH:MM" for time-only display.
+ * @param {string} iso ISO 8601 date string
+ * @returns {string} Formatted time or empty string
+ */
+export function fmtTime(iso) {
+  if (!iso) return '';
+  const d = new Date(iso);
+  return `${String(d.getUTCHours()).padStart(2, '0')}:${String(d.getUTCMinutes()).padStart(2, '0')}`;
+}
+// ─── ISO Week ────────────────────────────────────────────────────────────────
+/**
+ * Convert an epoch timestamp to an ISO week key string (e.g. "2026-W06").
+ * @param {number} epochMs Epoch timestamp in milliseconds
+ * @returns {string} ISO week key in format "YYYY-Wnn"
+ */
+export function isoWeekKey(epochMs) {
+  const d = new Date(epochMs);
+  const tmp = new Date(Date.UTC(d.getUTCFullYear(), d.getUTCMonth(), d.getUTCDate()));
+  tmp.setUTCDate(tmp.getUTCDate() + 4 - (tmp.getUTCDay() || 7));
+  const yearStart = new Date(Date.UTC(tmp.getUTCFullYear(), 0, 1));
+  const weekNum = Math.ceil(((tmp - yearStart) / 86400000 + 1) / 7);
+  const isoYear = tmp.getUTCFullYear();
+  return `${isoYear}-W${String(weekNum).padStart(2, '0')}`;
+}

package/hash-utils.mjs ADDED Viewed

@@ -0,0 +1,77 @@
+// claude-mem-lite: Hashing and similarity utilities
+// Extracted from utils.mjs for focused responsibility
+/**
+ * Compute word-level Jaccard similarity between two strings.
+ * @param {string} a First string
+ * @param {string} b Second string
+ * @returns {number} Similarity score between 0 and 1
+ */
+export function jaccardSimilarity(a, b) {
+  if (!a || !b) return 0;
+  // Strip trailing punctuation from tokens to match MinHash normalization
+  // (prevents "server.rs," ≠ "server.rs" dedup failures)
+  const norm = s => s.toLowerCase().split(/\s+/).map(t => t.replace(/[,;:!?]+$/, ''));
+  const setA = new Set(norm(a));
+  const setB = new Set(norm(b));
+  let intersection = 0;
+  for (const w of setA) { if (setB.has(w)) intersection++; }
+  const union = setA.size + setB.size - intersection;
+  return union === 0 ? 0 : intersection / union;
+}
+// ─── MinHash Signatures ──────────────────────────────────────────────────
+// FNV-1a hash: fast, non-cryptographic, ~10x faster than SHA-256 for MinHash
+function fnv1a(str) {
+  let hash = 0x811c9dc5; // FNV offset basis (32-bit)
+  for (let i = 0; i < str.length; i++) {
+    hash ^= str.charCodeAt(i);
+    hash = Math.imul(hash, 0x01000193); // FNV prime
+    hash >>>= 0; // Keep as uint32
+  }
+  return hash;
+}
+/**
+ * Compute a MinHash signature for approximate set similarity.
+ * Returns null for texts with fewer than 3 tokens.
+ * @param {string} text Input text to hash
+ * @param {number} [numHashes=64] Number of hash functions
+ * @returns {string|null} Hex-encoded MinHash signature or null
+ */
+export function computeMinHash(text, numHashes = 64) {
+  if (!text || typeof text !== 'string') return null;
+  const tokens = text.toLowerCase().replace(/[^a-z0-9\s]/g, ' ').split(/\s+/)
+    .filter(t => t.length > 2);
+  // Require at least 3 tokens for meaningful signature (avoids high collision on short texts)
+  if (tokens.length < 3) return null;
+  const mins = new Array(numHashes).fill(0xFFFFFFFF);
+  for (const token of tokens) {
+    for (let i = 0; i < numHashes; i++) {
+      const val = fnv1a(`${i}-${token}`);
+      if (val < mins[i]) mins[i] = val;
+    }
+  }
+  return mins.map(v => v.toString(16).padStart(8, '0')).join('');
+}
+/**
+ * Estimate Jaccard similarity from two MinHash signatures.
+ * @param {string} sig1 First hex-encoded MinHash signature
+ * @param {string} sig2 Second hex-encoded MinHash signature
+ * @returns {number} Estimated Jaccard similarity between 0 and 1
+ */
+export function estimateJaccardFromMinHash(sig1, sig2) {
+  if (!sig1 || !sig2) return 0;
+  if (sig1.length !== sig2.length) return 0;
+  const numHashes = sig1.length / 8;
+  if (numHashes === 0) return 0;
+  let matches = 0;
+  for (let i = 0; i < numHashes; i++) {
+    const offset = i * 8;
+    if (sig1.slice(offset, offset + 8) === sig2.slice(offset, offset + 8)) matches++;
+  }
+  return matches / numHashes;
+}

package/hook-llm.mjs CHANGED Viewed

@@ -27,6 +27,11 @@ function buildFtsTextField(obs) {
   return { conceptsText, factsText, textField: [conceptsText, factsText, aliasesText, bigramText].filter(Boolean).join(' ') };
 }
+/**
+ * Save an observation to the database with three-tier dedup.
+ * @returns {number|null} The saved observation ID, or null if deduped.
+ *   Throws on DB error (callers should catch if needed).
+ */
 export function saveObservation(obs, projectOverride, sessionIdOverride, externalDb) {
   const db = externalDb || openDb();
   if (!db) return null;
@@ -41,7 +46,7 @@ export function saveObservation(obs, projectOverride, sessionIdOverride, externa
       VALUES (?, ?, ?, ?, ?, 'active')
     `).run(sessionId, sessionId, project, now.toISOString(), now.getTime());
-    // Three-tier dedup
+    // Three-tier dedup — returns null (not throw) for dedup hits
     // Tier 1 (fast): 5-min Jaccard on titles
     const fiveMinAgo = now.getTime() - DEDUP_WINDOW_MS;
     const recent = db.prepare(`
@@ -51,7 +56,7 @@ export function saveObservation(obs, projectOverride, sessionIdOverride, externa
     `).all(project, fiveMinAgo);
     if (obs.title && recent.some(r => jaccardSimilarity(r.title, obs.title) > 0.7)) {
-      return null;
+      return null; // dedup: Jaccard title match
     }
     // Tier 1.5: Extended title dedup for low-signal degraded titles
@@ -68,7 +73,7 @@ export function saveObservation(obs, projectOverride, sessionIdOverride, externa
         WHERE project = ? AND title = ? AND created_at_epoch > ? AND created_at_epoch <= ?
         LIMIT 1
       `).get(project, obs.title, sevenDaysAgo, fiveMinAgo);
-      if (exactDup) return null;
+      if (exactDup) return null; // dedup: exact title match
       // Phase 2: Jaccard similarity for near-duplicates (3-day window)
       const extRecent = db.prepare(`
         SELECT title FROM observations
@@ -76,7 +81,7 @@ export function saveObservation(obs, projectOverride, sessionIdOverride, externa
         ORDER BY created_at_epoch DESC LIMIT 60
       `).all(project, threeDaysAgo, fiveMinAgo);
       if (extRecent.some(r => jaccardSimilarity(r.title, obs.title) > 0.85)) {
-        return null;
+        return null; // dedup: low-signal Jaccard match
       }
     }
@@ -91,44 +96,57 @@ export function saveObservation(obs, projectOverride, sessionIdOverride, externa
       `).all(project, sevenDaysAgo);
       if (recentSigs.some(r => estimateJaccardFromMinHash(minhashSig, r.minhash_sig) > 0.8)) {
-        return null;
+        return null; // dedup: MinHash similarity match
       }
     }
     const { conceptsText, factsText, textField } = buildFtsTextField(obs);
-    const result = db.prepare(`
-      INSERT INTO observations (memory_session_id, project, text, type, title, subtitle, narrative, concepts, facts, files_read, files_modified, importance, minhash_sig, lesson_learned, search_aliases, branch, created_at, created_at_epoch)
-      VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
-    `).run(
-      sessionId, project,
-      textField, obs.type, obs.title, obs.subtitle || '',
-      obs.narrative || '',
-      conceptsText,
-      factsText,
-      JSON.stringify(obs.filesRead || []),
-      JSON.stringify(obs.files || []),
-      obs.importance ?? 1,
-      minhashSig,
-      obs.lessonLearned || null,
-      obs.searchAliases || null,
-      getCurrentBranch(),
-      now.toISOString(), now.getTime()
-    );
-    const savedId = Number(result.lastInsertRowid);
-    // Write TF-IDF vector (non-critical)
-    try {
-      const vocab = getVocabulary(db);
-      if (vocab) {
-        const vecText = [obs.title || '', obs.narrative || '', (Array.isArray(obs.concepts) ? obs.concepts.join(' ') : '')].filter(Boolean).join(' ');
-        const vec = computeVector(vecText, vocab);
-        if (vec) {
-          db.prepare('INSERT OR REPLACE INTO observation_vectors (observation_id, vector, vocab_version, created_at_epoch) VALUES (?, ?, ?, ?)')
-            .run(savedId, Buffer.from(vec.buffer), vocab.version, Date.now());
+    // Atomic: observation INSERT + observation_files + vector in one transaction
+    const savedId = db.transaction(() => {
+      const result = db.prepare(`
+        INSERT INTO observations (memory_session_id, project, text, type, title, subtitle, narrative, concepts, facts, files_read, files_modified, importance, minhash_sig, lesson_learned, search_aliases, branch, created_at, created_at_epoch)
+        VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
+      `).run(
+        sessionId, project,
+        textField, obs.type, obs.title, obs.subtitle || '',
+        obs.narrative || '',
+        conceptsText,
+        factsText,
+        JSON.stringify(obs.filesRead || []),
+        JSON.stringify(obs.files || []),
+        obs.importance ?? 1,
+        minhashSig,
+        obs.lessonLearned || null,
+        obs.searchAliases || null,
+        getCurrentBranch(),
+        now.toISOString(), now.getTime()
+      );
+      const id = Number(result.lastInsertRowid);
+      // Populate observation_files junction table
+      if (id && obs.files && obs.files.length > 0) {
+        const insertFile = db.prepare('INSERT OR IGNORE INTO observation_files (obs_id, filename) VALUES (?, ?)');
+        for (const f of obs.files) {
+          if (typeof f === 'string' && f.length > 0) insertFile.run(id, f);
         }
       }
-    } catch (e) { debugCatch(e, 'saveObservation-vector'); }
+      // Write TF-IDF vector (non-critical — catch inside transaction to avoid rollback)
+      try {
+        const vocab = getVocabulary(db);
+        if (vocab) {
+          const vecText = [obs.title || '', obs.narrative || '', (Array.isArray(obs.concepts) ? obs.concepts.join(' ') : '')].filter(Boolean).join(' ');
+          const vec = computeVector(vecText, vocab);
+          if (vec) {
+            db.prepare('INSERT OR REPLACE INTO observation_vectors (observation_id, vector, vocab_version, created_at_epoch) VALUES (?, ?, ?, ?)')
+              .run(id, Buffer.from(vec.buffer), vocab.version, Date.now());
+          }
+        }
+      } catch (e) { debugCatch(e, 'saveObservation-vector'); }
+      return id;
+    })();
     return savedId;
   } finally {

package/hook-memory.mjs CHANGED Viewed

@@ -132,22 +132,20 @@ export function recallForFile(db, filePath, project) {
     const cutoff = Date.now() - FILE_RECALL_LOOKBACK_MS;
     // Escape SQL LIKE wildcards in filename to prevent injection
     const escaped = basename.replace(/%/g, '\\%').replace(/_/g, '\\_');
-    // Match both full paths (/path/to/file.mjs) and basename-only entries ("file.mjs")
-    // Two patterns avoid false positives: %/file.mjs"% won't match /webapp.mjs
-    const pathPattern = `%/${escaped}"%`;
-    const namePattern = `%"${escaped}"%`;
+    const likePattern = `%${escaped}`;
     const rows = db.prepare(`
-      SELECT id, type, title, importance, lesson_learned
-      FROM observations
-      WHERE project = ?
-        AND importance >= 2
-        AND COALESCE(compressed_into, 0) = 0
-        AND superseded_at IS NULL
-        AND created_at_epoch > ?
-        AND (files_modified LIKE ? ESCAPE '\\' OR files_modified LIKE ? ESCAPE '\\')
-      ORDER BY created_at_epoch DESC
+      SELECT DISTINCT o.id, o.type, o.title, o.importance, o.lesson_learned
+      FROM observations o
+      JOIN observation_files of2 ON of2.obs_id = o.id
+      WHERE o.project = ?
+        AND o.importance >= 2
+        AND COALESCE(o.compressed_into, 0) = 0
+        AND o.superseded_at IS NULL
+        AND o.created_at_epoch > ?
+        AND (of2.filename = ? OR of2.filename LIKE ? ESCAPE '\\')
+      ORDER BY o.created_at_epoch DESC
       LIMIT ?
-    `).all(project, cutoff, pathPattern, namePattern, MAX_FILE_RECALL);
+    `).all(project, cutoff, filePath, likePattern, MAX_FILE_RECALL);
     const now = Date.now();
     const updateStmt = db.prepare('UPDATE observations SET access_count = COALESCE(access_count, 0) + 1, last_accessed_at = ? WHERE id = ?');
     for (const r of rows) updateStmt.run(now, r.id);

package/hook-update.mjs CHANGED Viewed

@@ -200,6 +200,8 @@ const SOURCE_FILES = [
   'registry.mjs', 'registry-scanner.mjs', 'registry-indexer.mjs',
   'registry-retriever.mjs', 'resource-discovery.mjs',
   'install.mjs', 'install-metadata.mjs', 'mem-cli.mjs', 'tier.mjs', 'tfidf.mjs',
+  'nlp.mjs', 'synonyms.mjs', 'scoring-sql.mjs', 'stop-words.mjs', 'project-utils.mjs',
+  'secret-scrub.mjs', 'format-utils.mjs', 'hash-utils.mjs', 'bash-utils.mjs',
 ];
 const SWITCHABLE_PATHS = [...SOURCE_FILES, 'scripts', 'registry', 'node_modules'];

package/install.mjs CHANGED Viewed

@@ -206,6 +206,8 @@ async function install() {
     'registry.mjs', 'registry-scanner.mjs', 'registry-indexer.mjs',
     'registry-retriever.mjs', 'resource-discovery.mjs',
     'install-metadata.mjs', 'mem-cli.mjs', 'tier.mjs', 'tfidf.mjs',
+    'nlp.mjs', 'synonyms.mjs', 'scoring-sql.mjs', 'stop-words.mjs', 'project-utils.mjs',
+    'secret-scrub.mjs', 'format-utils.mjs', 'hash-utils.mjs', 'bash-utils.mjs',
   ];
   if (IS_DEV) {