npm - claude-mem-lite - Versions diffs - 2.35.0 → 2.37.0 - Mend

claude-mem-lite 2.35.0 → 2.37.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

package/.claude-plugin/marketplace.json +1 -1
package/.claude-plugin/plugin.json +1 -1
package/hook-llm.mjs +68 -1
package/hook-memory.mjs +23 -7
package/hook.mjs +18 -0
package/lib/citation-tracker.mjs +82 -0
package/lib/low-signal-patterns.mjs +79 -0
package/package.json +2 -1
package/schema.mjs +8 -1
package/scoring-sql.mjs +38 -0
package/scripts/user-prompt-search.js +22 -2
package/source-files.mjs +1 -0
package/utils.mjs +1 -1

package/.claude-plugin/marketplace.json CHANGED Viewed

@@ -10,7 +10,7 @@
   "plugins": [
     {
       "name": "claude-mem-lite",
-      "version": "2.35.0",
+      "version": "2.37.0",
       "source": "./",
       "description": "Lightweight persistent memory system for Claude Code — FTS5 search, episode batching, error-triggered recall"
     }

package/.claude-plugin/plugin.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "claude-mem-lite",
-  "version": "2.35.0",
+  "version": "2.37.0",
   "description": "Lightweight persistent memory system for Claude Code — FTS5 search, episode batching, error-triggered recall",
   "author": {
     "name": "sdsrss"

package/hook-llm.mjs CHANGED Viewed

@@ -16,6 +16,7 @@ import {
   sessionFile, getSessionId, openDb, callLLM, sleep,
 } from './hook-shared.mjs';
 import { EVENT_TYPES, saveEvent } from './lib/activity.mjs';
+import { isNoiseObservation } from './lib/low-signal-patterns.mjs';
 // T9: memdir-incompatible types live in the `events` table, not `observations`.
 // Set lookup is O(1) — authoritative source is lib/activity.mjs::EVENT_TYPES.
@@ -69,6 +70,14 @@ export function saveObservation(obs, projectOverride, sessionIdOverride, externa
       VALUES (?, ?, ?, ?, ?, 'active')
     `).run(sessionId, sessionId, project, now.toISOString(), now.getTime());
+    // P0: write-side noise block — LOW_SIGNAL title with no recoverable signal
+    // (no lesson, importance<2, empty facts, thin narrative) is dropped before
+    // dedup/MinHash/vector work. Opt-out: CLAUDE_MEM_KEEP_LOW_SIGNAL=1.
+    if (isNoiseObservation(obs)) {
+      debugLog('saveObservation', `dropped noise: ${truncate(obs.title || '', 60)}`);
+      return null;
+    }
     // Three-tier dedup — returns null (not throw) for dedup hits
     // Tier 1 (fast): 5-min Jaccard on titles
     const fiveMinAgo = now.getTime() - DEDUP_WINDOW_MS;
@@ -464,6 +473,38 @@ export function buildImmediateObservation(episode) {
   };
 }
+// ─── Lesson retry prompt (P3) ───────────────────────────────────────────────
+/**
+ * Build a lesson-focused retry prompt after Haiku's first pass for
+ * bugfix/decision returned null/empty/'none'. Narrow ask: one non-obvious
+ * insight a future session would benefit from — either root cause (bugfix)
+ * or tradeoff (decision).
+ *
+ * @param {object} episode
+ * @param {object} firstPass — parsed first-pass response (title, type, narrative)
+ * @returns {string} prompt
+ */
+export function buildLessonRetryPrompt(episode, firstPass) {
+  const actionList = episode.entries.map((e, i) =>
+    `${i + 1}. [${e.tool}] ${e.desc}${e.isError ? ' (ERROR)' : ''}`
+  ).join('\n');
+  const typeHint = firstPass.type === 'bugfix'
+    ? 'For this bugfix: what was the root cause + how to spot it next time? Example: "FTS5 trigger fires on any UPDATE — wrap access_count writes in try/catch."'
+    : 'For this decision: what tradeoff was made + why? Example: "Chose single-source module over schema column because 1 drift point, not 4."';
+  return `A ${firstPass.type} episode just completed. First-pass title: "${firstPass.title || 'untitled'}".
+Actions:
+${actionList}
+${typeHint}
+If the work was purely mechanical with no insight worth remembering, reply {"lesson":"none"}.
+Otherwise reply in 12-280 chars.
+Reply ONLY valid JSON, no markdown fences: {"lesson":"..."}`;
+}
 // ─── Background: LLM Episode Extraction (Tier 2 F) ──────────────────────────
 export async function handleLLMEpisode() {
@@ -506,6 +547,7 @@ Action: ${e.desc}
 Error: ${e.isError ? 'yes' : 'no'}
 JSON: {"type":"decision|bugfix|feature|refactor|discovery|change","title":"concise ≤80 char description","narrative":"what changed, why, and outcome (2-3 sentences)","concepts":["kw1","kw2"],"facts":["fact1","fact2"],"importance":1,"lesson_learned":"non-obvious insight or 'none' if routine","search_aliases":["alt query 1","alt query 2"]}
+type: pick by strongest signal. decision = explicit tradeoff / "chose X over Y because Z" / rejected an approach (e.g. "Rejected schema migration — single-source module + sync test instead"; "Heterogeneous hook events → heterogeneous context budgets"). bugfix = prior-failing path fixed with a named root cause. feature = new user-visible capability. refactor = behavior unchanged but structure improved. discovery = learned how a system works (read-heavy, no writes). change = routine edit with no new principle (default if unsure and nothing else fits).
 Facts: each MUST be (1) atomic—one claim, (2) self-contained—no pronouns, include file/function name, (3) specific—"refreshToken() in auth.ts:45 uses 1h TTL" not "handles tokens"
 importance: Be strict — default to 1. 0=pure browsing with zero learning value. 1=routine file edits, standard changes, normal workflow (MOST episodes). 2=notable ONLY if it reveals something non-obvious: error fix with discovered root cause, architectural decision with explicit tradeoff, config change with unexpected side effects. 3=critical: breaking change affecting users, security vulnerability fix, data migration. Ask yourself: "would a future session benefit from knowing this?" — if not, it's importance=1.
 lesson_learned: REQUIRED field. State what was learned that isn't obvious from reading the code. Examples: "FTS5 porter stemmer doesn't tokenize CJK — need bigram workaround", "vitest --reporter=verbose hangs on large test suites, use default reporter". If purely routine with nothing learned, write "none" (not null).
@@ -523,6 +565,7 @@ Actions (${episode.entries.length} total):
 ${actionList}
 JSON: {"type":"decision|bugfix|feature|refactor|discovery|change","title":"coherent ≤80 char summary","narrative":"what was done, why, and outcome (3-5 sentences)","concepts":["keyword1","keyword2"],"facts":["specific fact 1","specific fact 2"],"importance":1,"lesson_learned":"non-obvious insight or 'none' if routine","search_aliases":["alt query 1","alt query 2"]}
+type: pick by strongest signal. decision = explicit tradeoff / "chose X over Y because Z" / rejected an approach (e.g. "Rejected schema migration — single-source module + sync test instead"; "Heterogeneous hook events → heterogeneous context budgets"). bugfix = prior-failing path fixed with a named root cause. feature = new user-visible capability. refactor = behavior unchanged but structure improved. discovery = learned how a system works (read-heavy, no writes). change = routine edit with no new principle (default if unsure and nothing else fits).
 Facts: each MUST be (1) atomic—one claim, (2) self-contained—no pronouns, include file/function name, (3) specific—"refreshToken() in auth.ts:45 uses 1h TTL" not "handles tokens"
 importance: Be strict — default to 1. 0=pure browsing with zero learning value. 1=routine file edits, standard changes, normal workflow (MOST episodes). 2=notable ONLY if it reveals something non-obvious: error fix with discovered root cause, architectural decision with explicit tradeoff, config change with unexpected side effects. 3=critical: breaking change affecting users, security vulnerability fix, data migration. Ask yourself: "would a future session benefit from knowing this?" — if not, it's importance=1.
 lesson_learned: REQUIRED field. State what was learned that isn't obvious from reading the code. Examples: "FTS5 porter stemmer doesn't tokenize CJK — need bigram workaround", "vitest --reporter=verbose hangs on large test suites, use default reporter". If purely routine with nothing learned, write "none" (not null).
@@ -570,7 +613,31 @@ search_aliases: 2-6 alternative search terms someone might use to find this memo
       const rawLesson = typeof parsed.lesson_learned === 'string' ? parsed.lesson_learned.trim() : '';
       const lowSignalLesson = new Set(['none', '', 'n/a', 'null', 'todo', 'tbd', 'na', '-', 'nothing', 'nil']);
       const isLessonLowSignal = lowSignalLesson.has(rawLesson.toLowerCase()) || rawLesson.length < 12;
-      const lessonLearned = isLessonLowSignal ? null : rawLesson.slice(0, 500);
+      let lessonLearned = isLessonLowSignal ? null : rawLesson.slice(0, 500);
+      // P3: for bugfix/decision, retry once with a lesson-focused prompt.
+      // These types have the highest reuse value (~72.7% hit-rate vs change
+      // ~16.5%), and Haiku's first pass writes NULL ~70% of the time for
+      // curated observations. Retry budget: 1 extra callLLM per bugfix/decision
+      // episode. Opt-out: CLAUDE_MEM_NO_LESSON_RETRY=1.
+      if (isLessonLowSignal &&
+          (parsed.type === 'bugfix' || parsed.type === 'decision') &&
+          !process.env.CLAUDE_MEM_NO_LESSON_RETRY) {
+        try {
+          const retryPrompt = buildLessonRetryPrompt(episode, parsed);
+          const retryRaw = callLLM(retryPrompt, 10000);
+          if (retryRaw) {
+            const retry = parseJsonFromLLM(retryRaw);
+            const retryLesson = typeof retry?.lesson === 'string' ? retry.lesson.trim() : '';
+            const retryIsLow = lowSignalLesson.has(retryLesson.toLowerCase()) || retryLesson.length < 12;
+            if (!retryIsLow) {
+              lessonLearned = retryLesson.slice(0, 500);
+              debugLog('DEBUG', 'llm-episode', `lesson-retry: recovered ${retryLesson.length}-char lesson for ${parsed.type}`);
+            }
+          }
+        } catch (e) { debugCatch(e, 'lesson-retry'); }
+      }
       const searchAliases = Array.isArray(parsed.search_aliases)
         ? parsed.search_aliases.slice(0, 6).join(' ')
         : null;

package/hook-memory.mjs CHANGED Viewed

@@ -1,7 +1,7 @@
 // claude-mem-lite — Semantic Memory Injection
 // Search past observations for relevant memories to inject as context at user-prompt time.
-import { sanitizeFtsQuery, relaxFtsQueryToOr, debugCatch, OBS_BM25, notLowSignalTitleClause } from './utils.mjs';
+import { sanitizeFtsQuery, relaxFtsQueryToOr, debugCatch, OBS_BM25, notLowSignalTitleClause, noisePenaltyClause } from './utils.mjs';
 const MAX_MEMORY_INJECTIONS = 3;
 const MEMORY_LOOKBACK_MS = 60 * 86400000; // 60 days
@@ -42,9 +42,14 @@ export function searchRelevantMemories(db, userPrompt, project, excludeIds = [])
     // R1: notLowSignalTitleClause() excludes hook-llm fallback titles
     // ("Modified X", "Worked on X", "Reviewed N files:", raw error logs, etc.)
     // that almost never get referenced (3.3% access rate) but compete for BM25 rank.
+    // v26 P0: noise_penalty is multiplied AFTER sort-BM25 so the column used
+    // for ORDER BY stays the penalty-adjusted `relevance` applied downstream
+    // in JS (scored.sort). SELECT exposes both raw BM25 (for sort) and the
+    // penalty factor (for the final JS score).
     const selectStmt = db.prepare(`
       SELECT o.id, o.type, o.title, o.importance, o.lesson_learned, o.project,
-             ${OBS_BM25} as relevance
+             ${OBS_BM25} as relevance,
+             ${noisePenaltyClause('o')} as noise_penalty
       FROM observations_fts
       JOIN observations o ON o.id = observations_fts.rowid
       WHERE observations_fts MATCH ?
@@ -80,7 +85,8 @@ export function searchRelevantMemories(db, userPrompt, project, excludeIds = [])
     try {
       const crossStmt = db.prepare(`
         SELECT o.id, o.type, o.title, o.importance, o.lesson_learned, o.project,
-               ${OBS_BM25} as relevance
+               ${OBS_BM25} as relevance,
+               ${noisePenaltyClause('o')} as noise_penalty
         FROM observations_fts
         JOIN observations o ON o.id = observations_fts.rowid
         WHERE observations_fts MATCH ?
@@ -105,12 +111,14 @@ export function searchRelevantMemories(db, userPrompt, project, excludeIds = [])
     // Merge and score: same-project full weight, cross-project 0.7x
     // OR-fallback results get 0.4x penalty — they matched individual words, not the full intent
+    // v26 P0: noise_penalty (from SQL) shrinks high-inject/low-cite rows.
     const allRows = [...rows.map(r => ({ ...r, _or: usedOrFallback })), ...crossRows.map(r => ({ ...r, _or: crossUsedOr }))];
     const scored = allRows
       .filter(r => !excludeSet.has(r.id))
       .map(r => {
         const crossProjectPenalty = r.project === project ? 1.0 : 0.7;
         const orFallbackPenalty = r._or ? 0.4 : 1.0;
+        const noisePenalty = typeof r.noise_penalty === 'number' ? r.noise_penalty : 1.0;
         return {
           ...r,
           score: Math.abs(r.relevance)
@@ -118,7 +126,8 @@ export function searchRelevantMemories(db, userPrompt, project, excludeIds = [])
             * (r.lesson_learned ? 1.5 : 1.0)
             * (r.importance >= 2 ? 1.0 : 0.6)
             * crossProjectPenalty
-            * orFallbackPenalty,
+            * orFallbackPenalty
+            * noisePenalty,
         };
       })
       .sort((a, b) => b.score - a.score);
@@ -133,12 +142,19 @@ export function searchRelevantMemories(db, userPrompt, project, excludeIds = [])
     const aboveThreshold = scored.filter(r => r.score >= threshold);
     if (aboveThreshold.length === 0) return [];
-    // Update access_count for injected memories
+    // v26 P0: bump injection_count (NOT access_count) for injected rows.
+    // Before v26 this was bumping access_count, which conflated auto-injection
+    // with real cites/recalls/opens — polluting the noise-ratio signal the
+    // penalty clause now depends on. access_count is reserved for explicit
+    // access (cmdRecall/cmdGet/cmdTimeline/pre-tool-recall/citation-tracker).
+    // Per-row try/catch for FTS trigger safety (project_non_obvious.md).
     const result = aboveThreshold.slice(0, MAX_MEMORY_INJECTIONS);
     const now = Date.now();
-    const updateStmt = db.prepare('UPDATE observations SET access_count = COALESCE(access_count, 0) + 1, last_accessed_at = ? WHERE id = ?');
+    const bumpStmt = db.prepare(
+      'UPDATE observations SET injection_count = COALESCE(injection_count, 0) + 1, last_injected_at = ? WHERE id = ?'
+    );
     for (const r of result) {
-      updateStmt.run(now, r.id);
+      try { bumpStmt.run(now, r.id); } catch {}
     }
     return result;

package/hook.mjs CHANGED Viewed

@@ -42,6 +42,7 @@ import {
   spawnBackground,
 } from './hook-shared.mjs';
 import { handleLLMEpisode, handleLLMSummary, saveObservation, buildImmediateObservation } from './hook-llm.mjs';
+import { extractCitationsFromTranscript, bumpCitationAccess } from './lib/citation-tracker.mjs';
 import { searchRelevantMemories } from './hook-memory.mjs';
 import { buildAndSaveHandoff, detectContinuationIntent, renderHandoffInjection, extractUnfinishedSummary } from './hook-handoff.mjs';
 import { checkForUpdate } from './hook-update.mjs';
@@ -344,12 +345,16 @@ async function handleStop() {
   // This is the stable CC identifier — the mem plugin's file-based getSessionId()
   // collides across parallel sessions for the same project (see docs/bug.txt).
   let ccSessionId = null;
+  let transcriptPath = null;
   try {
     const raw = await readStdin();
     const hookData = JSON.parse(raw.text);
     if (typeof hookData?.session_id === 'string' && hookData.session_id.length > 0) {
       ccSessionId = hookData.session_id;
     }
+    if (typeof hookData?.transcript_path === 'string' && hookData.transcript_path.length > 0) {
+      transcriptPath = hookData.transcript_path;
+    }
   } catch { /* stdin unavailable — fall back to local session id */ }
   // Capture session info BEFORE cleanup. All DB lookups use the mem-internal id
@@ -448,6 +453,19 @@ async function handleStop() {
           }
         }
       } catch (e) { debugCatch(e, 'handleStop-fast-summary'); }
+      // P4: scan transcript for `#NN` observation citations in assistant text
+      // and bump access_count for matched rows. Closes the loop on the "cite #NN"
+      // contract — before P4 this was a one-way obligation with no feedback.
+      try {
+        if (transcriptPath && !process.env.CLAUDE_MEM_NO_CITATION_TRACK) {
+          const ids = extractCitationsFromTranscript(transcriptPath);
+          if (ids.size > 0) {
+            const n = bumpCitationAccess(db, ids, project);
+            debugLog('DEBUG', 'handleStop', `citations: ${ids.size} ids scanned, ${n} obs bumped`);
+          }
+        }
+      } catch (e) { debugCatch(e, 'handleStop-citation-track'); }
     } finally {
       db.close();
     }

package/lib/citation-tracker.mjs ADDED Viewed

@@ -0,0 +1,82 @@
+// Citation tracker (P4): scan Claude Code transcript for `#NN` observation-id
+// citations in assistant text, then bulk-increment access_count for matched rows.
+//
+// Closes the loop on the CLAUDE.md "cite #NN" contract — before P4, citations
+// were a one-way obligation with no measurable feedback. Now each honored
+// citation bumps access_count, making contract compliance observable via
+// mem_stats and preventing cited lessons from decaying into dead memory.
+//
+// FTS5 caveat (project_non_obvious.md): observations_au trigger fires on any
+// column UPDATE including access_count. Per-row UPDATEs wrapped in try-catch
+// to prevent SQLITE_CORRUPT_VTAB cascades from stopping the whole scan.
+import { readFileSync, existsSync } from 'fs';
+import { debugCatch } from '../utils.mjs';
+// `#123` / `#45678` at a word boundary — matches the CLAUDE.md cite pattern.
+// Bounded to 1-7 digits to skip URL fragments, markdown anchors, etc.
+const CITATION_RE = /#(\d{1,7})\b/g;
+/**
+ * Parse a Claude Code transcript .jsonl and extract unique observation IDs
+ * cited inside assistant text blocks.
+ *
+ * @param {string} transcriptPath Path to transcript file (.jsonl)
+ * @returns {Set<number>} unique IDs referenced as `#NN` in assistant text
+ */
+export function extractCitationsFromTranscript(transcriptPath) {
+  const ids = new Set();
+  if (!transcriptPath || !existsSync(transcriptPath)) return ids;
+  let raw;
+  try { raw = readFileSync(transcriptPath, 'utf8'); } catch { return ids; }
+  for (const line of raw.split('\n')) {
+    if (!line.trim()) continue;
+    let entry;
+    try { entry = JSON.parse(line); } catch { continue; }
+    // Claude Code transcript: one JSON per line with type='assistant' | 'user' | ...
+    if (entry.type !== 'assistant' || !entry.message) continue;
+    const content = entry.message.content;
+    if (!Array.isArray(content)) continue;
+    for (const block of content) {
+      if (block.type !== 'text' || typeof block.text !== 'string') continue;
+      CITATION_RE.lastIndex = 0;
+      let m;
+      while ((m = CITATION_RE.exec(block.text))) {
+        const id = Number(m[1]);
+        if (Number.isInteger(id) && id > 0 && id < 1e7) ids.add(id);
+      }
+    }
+  }
+  return ids;
+}
+/**
+ * Increment `access_count` (and `last_accessed_at`) for each cited observation
+ * that belongs to `project`. Returns the count of successful increments.
+ *
+ * Per-row UPDATE in try-catch so a single FTS-corrupted row can't abort the
+ * scan. Cross-project IDs are silently ignored by the WHERE clause.
+ *
+ * @param {import('better-sqlite3').Database} db
+ * @param {Iterable<number>} ids
+ * @param {string} project
+ * @returns {number} count of rows incremented
+ */
+export function bumpCitationAccess(db, ids, project) {
+  if (!db || !ids || !project) return 0;
+  const idList = Array.isArray(ids) ? ids : [...ids];
+  if (idList.length === 0) return 0;
+  const stmt = db.prepare(`
+    UPDATE observations SET access_count = access_count + 1, last_accessed_at = ?
+    WHERE id = ? AND project = ?
+  `);
+  const now = Date.now();
+  let n = 0;
+  for (const id of idList) {
+    try {
+      const result = stmt.run(now, id, project);
+      if (result.changes > 0) n++;
+    } catch (e) { debugCatch(e, `bumpCitationAccess-id-${id}`); }
+  }
+  return n;
+}

package/lib/low-signal-patterns.mjs CHANGED Viewed

@@ -58,3 +58,82 @@ export function buildNotLowSignalSql(alias = '') {
   const clauses = LOW_SIGNAL_PATTERNS.map(({ like }) => `${p}title NOT LIKE '${like}'`);
   return '(\n    ' + clauses.join('\n    AND ') + '\n  )';
 }
+// Cached singleton — isNoiseObservation is called once per observation insert.
+const _LOW_SIG_RE = buildLowSignalRegex();
+/**
+ * Detect narrative that is raw tool-output passthrough, not human/LLM prose (P2).
+ *
+ * `buildImmediateObservation` constructs narrative as
+ * `episode.entries.map(e => e.desc).join('; ')` where each desc is
+ * "cmd → stdout/stderr" from `scripts/post-tool-use.sh`. Such narratives
+ * have characteristic fingerprints (arrows, stack traces, diffs, test
+ * failure banners, absent sentence prose) that Haiku/user-written narratives
+ * don't. This check treats passthrough narratives as zero-signal for the
+ * purposes of isNoiseObservation.
+ *
+ * @param {string} narrative
+ * @returns {boolean} true = raw tool output, not substantive narrative
+ */
+function _isLikelyToolOutputPassthrough(narrative) {
+  if (!narrative || narrative.length < 80) return false;
+  // post-tool-use.sh formats entries as "cmd → output"; presence of " → " in
+  // a long narrative is near-diagnostic of raw entry-desc passthrough.
+  if (/ → /.test(narrative)) return true;
+  // Stack-trace fingerprints that never appear in curated narratives.
+  if (/\n\s+at .+:\d+:\d+/.test(narrative)) return true;
+  if (/node:internal\//.test(narrative)) return true;
+  // Raw diff output.
+  if (/(^|\n)diff --git |(^|\n)@@ -\d/.test(narrative)) return true;
+  // Test-runner failure banners.
+  if (/(^|\n)\s*FAIL\s+|AssertionError|TypeError: |SyntaxError: /.test(narrative)) return true;
+  // Absent sentence prose + multi-"; " is the buildImmediateObservation join signature.
+  const hasSentenceBreaks = /\. [A-Z]/.test(narrative);
+  const semiJoins = (narrative.match(/; /g) || []).length;
+  if (!hasSentenceBreaks && semiJoins >= 2) return true;
+  return false;
+}
+/**
+ * Write-side noise filter (P0/P2). Returns true when an observation has a
+ * LOW_SIGNAL title AND no recoverable downstream signal — caller should skip
+ * insertion.
+ *
+ * Contract: a low-signal title is kept if ANY of these carry signal:
+ *   - lesson_learned set and not 'none'
+ *   - importance >= 2
+ *   - facts has >=1 non-empty string
+ *   - narrative >= 40 chars AND not raw stderr / tool-output passthrough (P2)
+ *
+ * Opt-out: env `CLAUDE_MEM_KEEP_LOW_SIGNAL=1` disables filter (preserves
+ * pre-v2.36 behavior — every observation is inserted regardless of signal).
+ *
+ * @param {object} obs Observation shape: { title, facts, narrative, lessonLearned|lesson_learned, importance }
+ * @param {object} [env=process.env] Environment (injected for testability)
+ * @returns {boolean} true = noise, caller should drop
+ */
+export function isNoiseObservation(obs, env = process.env) {
+  if (env && env.CLAUDE_MEM_KEEP_LOW_SIGNAL === '1') return false;
+  const title = (obs && obs.title) || '';
+  if (!_LOW_SIG_RE.test(title)) return false;
+  const lesson = obs.lessonLearned ?? obs.lesson_learned;
+  if (lesson && String(lesson).trim() && String(lesson).trim().toLowerCase() !== 'none') return false;
+  if ((obs.importance ?? 1) >= 2) return false;
+  if (Array.isArray(obs.facts) &&
+      obs.facts.filter(f => typeof f === 'string' && f.trim().length > 0).length >= 1) {
+    return false;
+  }
+  const narrative = (obs.narrative || '').trim();
+  if (narrative.length >= 40 &&
+      !/^Error[: ]/i.test(narrative) &&
+      !_isLikelyToolOutputPassthrough(narrative)) {
+    return false;
+  }
+  return true;
+}

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "claude-mem-lite",
-  "version": "2.35.0",
+  "version": "2.37.0",
   "description": "Lightweight persistent memory system for Claude Code",
   "type": "module",
   "engines": {
@@ -50,6 +50,7 @@
     "lib/doctor-drift.mjs",
     "lib/stats-quality.mjs",
     "lib/low-signal-patterns.mjs",
+    "lib/citation-tracker.mjs",
     "registry.mjs",
     "registry-retriever.mjs",
     "registry-indexer.mjs",

package/schema.mjs CHANGED Viewed

@@ -13,7 +13,7 @@ export const DB_PATH = join(DB_DIR, 'claude-mem-lite.db');
 export const REGISTRY_DB_PATH = join(DB_DIR, 'resource-registry.db');
 // Increment when schema changes (tables, columns, indexes, FTS, migrations)
-export const CURRENT_SCHEMA_VERSION = 25;
+export const CURRENT_SCHEMA_VERSION = 26;
 const CORE_SCHEMA = `
   CREATE TABLE IF NOT EXISTS sdk_sessions (
@@ -112,6 +112,13 @@ const MIGRATIONS = [
   'ALTER TABLE observations ADD COLUMN superseded_by INTEGER DEFAULT NULL',
   'ALTER TABLE observations ADD COLUMN last_accessed_at INTEGER DEFAULT NULL',
   'ALTER TABLE observations ADD COLUMN optimized_at INTEGER DEFAULT NULL',
+  // v26 (P0 injection-noise): per-obs injection tracking for noise-ratio
+  // penalty. injection_count bumps only on UserPromptSubmit / hook-memory
+  // auto-injection (not on explicit recall/get/timeline — those keep bumping
+  // access_count). Pair with access_count to compute noise ratio: high
+  // injection_count + low access_count = low-signal, deprioritize.
+  'ALTER TABLE observations ADD COLUMN injection_count INTEGER NOT NULL DEFAULT 0',
+  'ALTER TABLE observations ADD COLUMN last_injected_at INTEGER DEFAULT NULL',
 ];
 /**

package/scoring-sql.mjs CHANGED Viewed

@@ -61,6 +61,44 @@ export const TYPE_QUALITY_CASE = `(
   END
 )`;
+/**
+ * Noise-ratio penalty: deprioritizes observations that get auto-injected often
+ * but rarely "used" (cited via Stop-hook citation tracker, or explicitly
+ * recalled/opened via pre-tool-recall / cmdRecall / cmdGet / cmdTimeline).
+ *
+ * Signal sources:
+ *   - injection_count: bumped ONLY on UserPromptSubmit / hook-memory auto-inject
+ *   - access_count: bumped on citation (c039352 P4), explicit recall, get, timeline
+ *
+ * Empirical thresholds (see docs/p0-injection-noise-baseline.txt, 53 transcripts):
+ *   • High-noise legitimate use (#5597 29/10=2.9x): kept at 1.0× (below tier-1)
+ *   • Moderate noise (#4352 44/9=4.89x): drops to 0.5× (tier-1 hit)
+ *   • Pure noise (#4046 14/0=inf): drops to 0.5× (tier-1; count≥10 gate protects
+ *     cold-start obs with legitimately no cites yet)
+ *   • Entrenched noise (≥20 inject, ≥5× ratio): drops to 0.2× (tier-2)
+ *
+ * Applied as: BM25 × time_decay × TYPE_QUALITY × (0.5 + 0.5·importance) × NOISE_PENALTY
+ * Note: multiplicative so ORDER BY relevance ASC (negative scores) still works —
+ * penalty shrinks magnitude, making the row less preferable.
+ *
+ * @param {string} [alias='o'] Table alias for the observations row.
+ * @returns {string} SQL CASE expression (already parenthesized).
+ */
+export function noisePenaltyClause(alias = 'o') {
+  const a = alias ? `${alias}.` : '';
+  return `(
+    CASE
+      WHEN COALESCE(${a}injection_count, 0) >= 20
+        AND COALESCE(${a}injection_count, 0) > COALESCE(${a}access_count, 0) * 5
+        THEN 0.2
+      WHEN COALESCE(${a}injection_count, 0) >= 10
+        AND COALESCE(${a}injection_count, 0) > COALESCE(${a}access_count, 0) * 3
+        THEN 0.5
+      ELSE 1.0
+    END
+  )`;
+}
 /**
  * SQL WHERE clause fragment excluding LOW_SIGNAL degraded titles — the fallback
  * titles hook-llm.mjs writes when Haiku summarization is unavailable or skipped

package/scripts/user-prompt-search.js CHANGED Viewed

@@ -4,7 +4,7 @@
 // Lightweight: only imports schema.mjs and utils.mjs, no MCP SDK
 import { ensureDb, DB_DIR, REGISTRY_DB_PATH } from '../schema.mjs';
-import { sanitizeFtsQuery, relaxFtsQueryToOr, truncate, typeIcon, inferProject, OBS_BM25, TYPE_DECAY_CASE, TYPE_QUALITY_CASE, notLowSignalTitleClause } from '../utils.mjs';
+import { sanitizeFtsQuery, relaxFtsQueryToOr, truncate, typeIcon, inferProject, OBS_BM25, TYPE_DECAY_CASE, TYPE_QUALITY_CASE, notLowSignalTitleClause, noisePenaltyClause } from '../utils.mjs';
 import { writeFileSync, readFileSync, existsSync, renameSync } from 'fs';
 import { join } from 'path';
 import Database from 'better-sqlite3';
@@ -87,12 +87,16 @@ function searchByFts(db, queryText, project, limit, typeFilter) {
   const now = Date.now();
   // R1: notLowSignalTitleClause() excludes hook-llm degraded titles
   // ("Modified X", "Worked on X", "Reviewed N files:", raw error logs).
+  // v26 P0: noise penalty shrinks relevance magnitude for obs with high
+  // inject:access ratio (auto-injected often, never cited/opened). See
+  // docs/p0-injection-noise-baseline.txt.
   const sql = `
     SELECT o.id, o.type, o.title, o.lesson_learned,
            ${OBS_BM25}
              * (1.0 + EXP(-0.693 * (? - o.created_at_epoch) / ${TYPE_DECAY_CASE}))
              * ${TYPE_QUALITY_CASE}
-             * (0.5 + 0.5 * COALESCE(o.importance, 1)) as relevance
+             * (0.5 + 0.5 * COALESCE(o.importance, 1))
+             * ${noisePenaltyClause('o')} as relevance
     FROM observations_fts
     JOIN observations o ON o.id = observations_fts.rowid
     WHERE observations_fts MATCH ?
@@ -460,6 +464,22 @@ async function main() {
           count: prevCount + 1,
         }));
       } catch {}
+      // v26 P0: bump injection_count for obs-based emits only (prompt-corpus
+      // rows have "P<id>" string IDs; skip those — they live in user_prompts).
+      // Per-row try/catch: observations_au trigger reinserts FTS on any UPDATE
+      // (project_non_obvious.md); an FTS corruption on one row must not abort
+      // counter bumps for other rows.
+      if (rows.length > 0) {
+        try {
+          const now = Date.now();
+          const bumpStmt = db.prepare(
+            'UPDATE observations SET injection_count = COALESCE(injection_count, 0) + 1, last_injected_at = ? WHERE id = ?'
+          );
+          for (const r of rows) {
+            try { bumpStmt.run(now, r.id); } catch {}
+          }
+        } catch {}
+      }
     }
     // ─── L1: Registry skill pointer (T4 v2.31) ──────────────────────────

package/source-files.mjs CHANGED Viewed

@@ -37,6 +37,7 @@ export const SOURCE_FILES = [
   'lib/doctor-drift.mjs',
   'lib/stats-quality.mjs',
   'lib/low-signal-patterns.mjs',
+  'lib/citation-tracker.mjs',
   // v2.32 invited-memory: memdir primitives + adopt/unadopt CLI
   'memdir.mjs',
   'adopt-content.mjs',

package/utils.mjs CHANGED Viewed

@@ -9,7 +9,7 @@ import { buildLowSignalRegex } from './lib/low-signal-patterns.mjs';
 // ─── Re-exports from extracted modules ──────────────────────────────────────
 // Backward compatibility: all consumers import from utils.mjs
-export { DECAY_HALF_LIFE_BY_TYPE, DEFAULT_DECAY_HALF_LIFE_MS, OBS_BM25, SESS_BM25, TYPE_DECAY_CASE, TYPE_QUALITY_CASE, OBS_FTS_COLUMNS, notLowSignalTitleClause } from './scoring-sql.mjs';
+export { DECAY_HALF_LIFE_BY_TYPE, DEFAULT_DECAY_HALF_LIFE_MS, OBS_BM25, SESS_BM25, TYPE_DECAY_CASE, TYPE_QUALITY_CASE, OBS_FTS_COLUMNS, notLowSignalTitleClause, noisePenaltyClause } from './scoring-sql.mjs';
 export { cjkBigrams, extractCjkSynonymTokens, extractCjkKeywords, extractCjkLikePatterns, SYNONYM_MAP, expandToken, sanitizeFtsQuery, relaxFtsQueryToOr, FTS_STOP_WORDS, CJK_COMPOUNDS } from './nlp.mjs';
 export { resolveProject, _resetProjectCache } from './project-utils.mjs';
 export { scrubSecrets, SECRET_PATTERNS } from './secret-scrub.mjs';