claude-mem-lite 2.35.0 → 2.37.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -10,7 +10,7 @@
10
10
  "plugins": [
11
11
  {
12
12
  "name": "claude-mem-lite",
13
- "version": "2.35.0",
13
+ "version": "2.37.0",
14
14
  "source": "./",
15
15
  "description": "Lightweight persistent memory system for Claude Code — FTS5 search, episode batching, error-triggered recall"
16
16
  }
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "claude-mem-lite",
3
- "version": "2.35.0",
3
+ "version": "2.37.0",
4
4
  "description": "Lightweight persistent memory system for Claude Code — FTS5 search, episode batching, error-triggered recall",
5
5
  "author": {
6
6
  "name": "sdsrss"
package/hook-llm.mjs CHANGED
@@ -16,6 +16,7 @@ import {
16
16
  sessionFile, getSessionId, openDb, callLLM, sleep,
17
17
  } from './hook-shared.mjs';
18
18
  import { EVENT_TYPES, saveEvent } from './lib/activity.mjs';
19
+ import { isNoiseObservation } from './lib/low-signal-patterns.mjs';
19
20
 
20
21
  // T9: memdir-incompatible types live in the `events` table, not `observations`.
21
22
  // Set lookup is O(1) — authoritative source is lib/activity.mjs::EVENT_TYPES.
@@ -69,6 +70,14 @@ export function saveObservation(obs, projectOverride, sessionIdOverride, externa
69
70
  VALUES (?, ?, ?, ?, ?, 'active')
70
71
  `).run(sessionId, sessionId, project, now.toISOString(), now.getTime());
71
72
 
73
+ // P0: write-side noise block — LOW_SIGNAL title with no recoverable signal
74
+ // (no lesson, importance<2, empty facts, thin narrative) is dropped before
75
+ // dedup/MinHash/vector work. Opt-out: CLAUDE_MEM_KEEP_LOW_SIGNAL=1.
76
+ if (isNoiseObservation(obs)) {
77
+ debugLog('saveObservation', `dropped noise: ${truncate(obs.title || '', 60)}`);
78
+ return null;
79
+ }
80
+
72
81
  // Three-tier dedup — returns null (not throw) for dedup hits
73
82
  // Tier 1 (fast): 5-min Jaccard on titles
74
83
  const fiveMinAgo = now.getTime() - DEDUP_WINDOW_MS;
@@ -464,6 +473,38 @@ export function buildImmediateObservation(episode) {
464
473
  };
465
474
  }
466
475
 
476
+ // ─── Lesson retry prompt (P3) ───────────────────────────────────────────────
477
+
478
+ /**
479
+ * Build a lesson-focused retry prompt after Haiku's first pass for
480
+ * bugfix/decision returned null/empty/'none'. Narrow ask: one non-obvious
481
+ * insight a future session would benefit from — either root cause (bugfix)
482
+ * or tradeoff (decision).
483
+ *
484
+ * @param {object} episode
485
+ * @param {object} firstPass — parsed first-pass response (title, type, narrative)
486
+ * @returns {string} prompt
487
+ */
488
+ export function buildLessonRetryPrompt(episode, firstPass) {
489
+ const actionList = episode.entries.map((e, i) =>
490
+ `${i + 1}. [${e.tool}] ${e.desc}${e.isError ? ' (ERROR)' : ''}`
491
+ ).join('\n');
492
+ const typeHint = firstPass.type === 'bugfix'
493
+ ? 'For this bugfix: what was the root cause + how to spot it next time? Example: "FTS5 trigger fires on any UPDATE — wrap access_count writes in try/catch."'
494
+ : 'For this decision: what tradeoff was made + why? Example: "Chose single-source module over schema column because 1 drift point, not 4."';
495
+ return `A ${firstPass.type} episode just completed. First-pass title: "${firstPass.title || 'untitled'}".
496
+
497
+ Actions:
498
+ ${actionList}
499
+
500
+ ${typeHint}
501
+
502
+ If the work was purely mechanical with no insight worth remembering, reply {"lesson":"none"}.
503
+ Otherwise reply in 12-280 chars.
504
+
505
+ Reply ONLY valid JSON, no markdown fences: {"lesson":"..."}`;
506
+ }
507
+
467
508
  // ─── Background: LLM Episode Extraction (Tier 2 F) ──────────────────────────
468
509
 
469
510
  export async function handleLLMEpisode() {
@@ -506,6 +547,7 @@ Action: ${e.desc}
506
547
  Error: ${e.isError ? 'yes' : 'no'}
507
548
 
508
549
  JSON: {"type":"decision|bugfix|feature|refactor|discovery|change","title":"concise ≤80 char description","narrative":"what changed, why, and outcome (2-3 sentences)","concepts":["kw1","kw2"],"facts":["fact1","fact2"],"importance":1,"lesson_learned":"non-obvious insight or 'none' if routine","search_aliases":["alt query 1","alt query 2"]}
550
+ type: pick by strongest signal. decision = explicit tradeoff / "chose X over Y because Z" / rejected an approach (e.g. "Rejected schema migration — single-source module + sync test instead"; "Heterogeneous hook events → heterogeneous context budgets"). bugfix = prior-failing path fixed with a named root cause. feature = new user-visible capability. refactor = behavior unchanged but structure improved. discovery = learned how a system works (read-heavy, no writes). change = routine edit with no new principle (default if unsure and nothing else fits).
509
551
  Facts: each MUST be (1) atomic—one claim, (2) self-contained—no pronouns, include file/function name, (3) specific—"refreshToken() in auth.ts:45 uses 1h TTL" not "handles tokens"
510
552
  importance: Be strict — default to 1. 0=pure browsing with zero learning value. 1=routine file edits, standard changes, normal workflow (MOST episodes). 2=notable ONLY if it reveals something non-obvious: error fix with discovered root cause, architectural decision with explicit tradeoff, config change with unexpected side effects. 3=critical: breaking change affecting users, security vulnerability fix, data migration. Ask yourself: "would a future session benefit from knowing this?" — if not, it's importance=1.
511
553
  lesson_learned: REQUIRED field. State what was learned that isn't obvious from reading the code. Examples: "FTS5 porter stemmer doesn't tokenize CJK — need bigram workaround", "vitest --reporter=verbose hangs on large test suites, use default reporter". If purely routine with nothing learned, write "none" (not null).
@@ -523,6 +565,7 @@ Actions (${episode.entries.length} total):
523
565
  ${actionList}
524
566
 
525
567
  JSON: {"type":"decision|bugfix|feature|refactor|discovery|change","title":"coherent ≤80 char summary","narrative":"what was done, why, and outcome (3-5 sentences)","concepts":["keyword1","keyword2"],"facts":["specific fact 1","specific fact 2"],"importance":1,"lesson_learned":"non-obvious insight or 'none' if routine","search_aliases":["alt query 1","alt query 2"]}
568
+ type: pick by strongest signal. decision = explicit tradeoff / "chose X over Y because Z" / rejected an approach (e.g. "Rejected schema migration — single-source module + sync test instead"; "Heterogeneous hook events → heterogeneous context budgets"). bugfix = prior-failing path fixed with a named root cause. feature = new user-visible capability. refactor = behavior unchanged but structure improved. discovery = learned how a system works (read-heavy, no writes). change = routine edit with no new principle (default if unsure and nothing else fits).
526
569
  Facts: each MUST be (1) atomic—one claim, (2) self-contained—no pronouns, include file/function name, (3) specific—"refreshToken() in auth.ts:45 uses 1h TTL" not "handles tokens"
527
570
  importance: Be strict — default to 1. 0=pure browsing with zero learning value. 1=routine file edits, standard changes, normal workflow (MOST episodes). 2=notable ONLY if it reveals something non-obvious: error fix with discovered root cause, architectural decision with explicit tradeoff, config change with unexpected side effects. 3=critical: breaking change affecting users, security vulnerability fix, data migration. Ask yourself: "would a future session benefit from knowing this?" — if not, it's importance=1.
528
571
  lesson_learned: REQUIRED field. State what was learned that isn't obvious from reading the code. Examples: "FTS5 porter stemmer doesn't tokenize CJK — need bigram workaround", "vitest --reporter=verbose hangs on large test suites, use default reporter". If purely routine with nothing learned, write "none" (not null).
@@ -570,7 +613,31 @@ search_aliases: 2-6 alternative search terms someone might use to find this memo
570
613
  const rawLesson = typeof parsed.lesson_learned === 'string' ? parsed.lesson_learned.trim() : '';
571
614
  const lowSignalLesson = new Set(['none', '', 'n/a', 'null', 'todo', 'tbd', 'na', '-', 'nothing', 'nil']);
572
615
  const isLessonLowSignal = lowSignalLesson.has(rawLesson.toLowerCase()) || rawLesson.length < 12;
573
- const lessonLearned = isLessonLowSignal ? null : rawLesson.slice(0, 500);
616
+ let lessonLearned = isLessonLowSignal ? null : rawLesson.slice(0, 500);
617
+
618
+ // P3: for bugfix/decision, retry once with a lesson-focused prompt.
619
+ // These types have the highest reuse value (~72.7% hit-rate vs change
620
+ // ~16.5%), and Haiku's first pass writes NULL ~70% of the time for
621
+ // curated observations. Retry budget: 1 extra callLLM per bugfix/decision
622
+ // episode. Opt-out: CLAUDE_MEM_NO_LESSON_RETRY=1.
623
+ if (isLessonLowSignal &&
624
+ (parsed.type === 'bugfix' || parsed.type === 'decision') &&
625
+ !process.env.CLAUDE_MEM_NO_LESSON_RETRY) {
626
+ try {
627
+ const retryPrompt = buildLessonRetryPrompt(episode, parsed);
628
+ const retryRaw = callLLM(retryPrompt, 10000);
629
+ if (retryRaw) {
630
+ const retry = parseJsonFromLLM(retryRaw);
631
+ const retryLesson = typeof retry?.lesson === 'string' ? retry.lesson.trim() : '';
632
+ const retryIsLow = lowSignalLesson.has(retryLesson.toLowerCase()) || retryLesson.length < 12;
633
+ if (!retryIsLow) {
634
+ lessonLearned = retryLesson.slice(0, 500);
635
+ debugLog('DEBUG', 'llm-episode', `lesson-retry: recovered ${retryLesson.length}-char lesson for ${parsed.type}`);
636
+ }
637
+ }
638
+ } catch (e) { debugCatch(e, 'lesson-retry'); }
639
+ }
640
+
574
641
  const searchAliases = Array.isArray(parsed.search_aliases)
575
642
  ? parsed.search_aliases.slice(0, 6).join(' ')
576
643
  : null;
package/hook-memory.mjs CHANGED
@@ -1,7 +1,7 @@
1
1
  // claude-mem-lite — Semantic Memory Injection
2
2
  // Search past observations for relevant memories to inject as context at user-prompt time.
3
3
 
4
- import { sanitizeFtsQuery, relaxFtsQueryToOr, debugCatch, OBS_BM25, notLowSignalTitleClause } from './utils.mjs';
4
+ import { sanitizeFtsQuery, relaxFtsQueryToOr, debugCatch, OBS_BM25, notLowSignalTitleClause, noisePenaltyClause } from './utils.mjs';
5
5
 
6
6
  const MAX_MEMORY_INJECTIONS = 3;
7
7
  const MEMORY_LOOKBACK_MS = 60 * 86400000; // 60 days
@@ -42,9 +42,14 @@ export function searchRelevantMemories(db, userPrompt, project, excludeIds = [])
42
42
  // R1: notLowSignalTitleClause() excludes hook-llm fallback titles
43
43
  // ("Modified X", "Worked on X", "Reviewed N files:", raw error logs, etc.)
44
44
  // that almost never get referenced (3.3% access rate) but compete for BM25 rank.
45
+ // v26 P0: noise_penalty is multiplied AFTER sort-BM25 so the column used
46
+ // for ORDER BY stays the penalty-adjusted `relevance` applied downstream
47
+ // in JS (scored.sort). SELECT exposes both raw BM25 (for sort) and the
48
+ // penalty factor (for the final JS score).
45
49
  const selectStmt = db.prepare(`
46
50
  SELECT o.id, o.type, o.title, o.importance, o.lesson_learned, o.project,
47
- ${OBS_BM25} as relevance
51
+ ${OBS_BM25} as relevance,
52
+ ${noisePenaltyClause('o')} as noise_penalty
48
53
  FROM observations_fts
49
54
  JOIN observations o ON o.id = observations_fts.rowid
50
55
  WHERE observations_fts MATCH ?
@@ -80,7 +85,8 @@ export function searchRelevantMemories(db, userPrompt, project, excludeIds = [])
80
85
  try {
81
86
  const crossStmt = db.prepare(`
82
87
  SELECT o.id, o.type, o.title, o.importance, o.lesson_learned, o.project,
83
- ${OBS_BM25} as relevance
88
+ ${OBS_BM25} as relevance,
89
+ ${noisePenaltyClause('o')} as noise_penalty
84
90
  FROM observations_fts
85
91
  JOIN observations o ON o.id = observations_fts.rowid
86
92
  WHERE observations_fts MATCH ?
@@ -105,12 +111,14 @@ export function searchRelevantMemories(db, userPrompt, project, excludeIds = [])
105
111
 
106
112
  // Merge and score: same-project full weight, cross-project 0.7x
107
113
  // OR-fallback results get 0.4x penalty — they matched individual words, not the full intent
114
+ // v26 P0: noise_penalty (from SQL) shrinks high-inject/low-cite rows.
108
115
  const allRows = [...rows.map(r => ({ ...r, _or: usedOrFallback })), ...crossRows.map(r => ({ ...r, _or: crossUsedOr }))];
109
116
  const scored = allRows
110
117
  .filter(r => !excludeSet.has(r.id))
111
118
  .map(r => {
112
119
  const crossProjectPenalty = r.project === project ? 1.0 : 0.7;
113
120
  const orFallbackPenalty = r._or ? 0.4 : 1.0;
121
+ const noisePenalty = typeof r.noise_penalty === 'number' ? r.noise_penalty : 1.0;
114
122
  return {
115
123
  ...r,
116
124
  score: Math.abs(r.relevance)
@@ -118,7 +126,8 @@ export function searchRelevantMemories(db, userPrompt, project, excludeIds = [])
118
126
  * (r.lesson_learned ? 1.5 : 1.0)
119
127
  * (r.importance >= 2 ? 1.0 : 0.6)
120
128
  * crossProjectPenalty
121
- * orFallbackPenalty,
129
+ * orFallbackPenalty
130
+ * noisePenalty,
122
131
  };
123
132
  })
124
133
  .sort((a, b) => b.score - a.score);
@@ -133,12 +142,19 @@ export function searchRelevantMemories(db, userPrompt, project, excludeIds = [])
133
142
  const aboveThreshold = scored.filter(r => r.score >= threshold);
134
143
  if (aboveThreshold.length === 0) return [];
135
144
 
136
- // Update access_count for injected memories
145
+ // v26 P0: bump injection_count (NOT access_count) for injected rows.
146
+ // Before v26 this was bumping access_count, which conflated auto-injection
147
+ // with real cites/recalls/opens — polluting the noise-ratio signal the
148
+ // penalty clause now depends on. access_count is reserved for explicit
149
+ // access (cmdRecall/cmdGet/cmdTimeline/pre-tool-recall/citation-tracker).
150
+ // Per-row try/catch for FTS trigger safety (project_non_obvious.md).
137
151
  const result = aboveThreshold.slice(0, MAX_MEMORY_INJECTIONS);
138
152
  const now = Date.now();
139
- const updateStmt = db.prepare('UPDATE observations SET access_count = COALESCE(access_count, 0) + 1, last_accessed_at = ? WHERE id = ?');
153
+ const bumpStmt = db.prepare(
154
+ 'UPDATE observations SET injection_count = COALESCE(injection_count, 0) + 1, last_injected_at = ? WHERE id = ?'
155
+ );
140
156
  for (const r of result) {
141
- updateStmt.run(now, r.id);
157
+ try { bumpStmt.run(now, r.id); } catch {}
142
158
  }
143
159
 
144
160
  return result;
package/hook.mjs CHANGED
@@ -42,6 +42,7 @@ import {
42
42
  spawnBackground,
43
43
  } from './hook-shared.mjs';
44
44
  import { handleLLMEpisode, handleLLMSummary, saveObservation, buildImmediateObservation } from './hook-llm.mjs';
45
+ import { extractCitationsFromTranscript, bumpCitationAccess } from './lib/citation-tracker.mjs';
45
46
  import { searchRelevantMemories } from './hook-memory.mjs';
46
47
  import { buildAndSaveHandoff, detectContinuationIntent, renderHandoffInjection, extractUnfinishedSummary } from './hook-handoff.mjs';
47
48
  import { checkForUpdate } from './hook-update.mjs';
@@ -344,12 +345,16 @@ async function handleStop() {
344
345
  // This is the stable CC identifier — the mem plugin's file-based getSessionId()
345
346
  // collides across parallel sessions for the same project (see docs/bug.txt).
346
347
  let ccSessionId = null;
348
+ let transcriptPath = null;
347
349
  try {
348
350
  const raw = await readStdin();
349
351
  const hookData = JSON.parse(raw.text);
350
352
  if (typeof hookData?.session_id === 'string' && hookData.session_id.length > 0) {
351
353
  ccSessionId = hookData.session_id;
352
354
  }
355
+ if (typeof hookData?.transcript_path === 'string' && hookData.transcript_path.length > 0) {
356
+ transcriptPath = hookData.transcript_path;
357
+ }
353
358
  } catch { /* stdin unavailable — fall back to local session id */ }
354
359
 
355
360
  // Capture session info BEFORE cleanup. All DB lookups use the mem-internal id
@@ -448,6 +453,19 @@ async function handleStop() {
448
453
  }
449
454
  }
450
455
  } catch (e) { debugCatch(e, 'handleStop-fast-summary'); }
456
+
457
+ // P4: scan transcript for `#NN` observation citations in assistant text
458
+ // and bump access_count for matched rows. Closes the loop on the "cite #NN"
459
+ // contract — before P4 this was a one-way obligation with no feedback.
460
+ try {
461
+ if (transcriptPath && !process.env.CLAUDE_MEM_NO_CITATION_TRACK) {
462
+ const ids = extractCitationsFromTranscript(transcriptPath);
463
+ if (ids.size > 0) {
464
+ const n = bumpCitationAccess(db, ids, project);
465
+ debugLog('DEBUG', 'handleStop', `citations: ${ids.size} ids scanned, ${n} obs bumped`);
466
+ }
467
+ }
468
+ } catch (e) { debugCatch(e, 'handleStop-citation-track'); }
451
469
  } finally {
452
470
  db.close();
453
471
  }
@@ -0,0 +1,82 @@
1
+ // Citation tracker (P4): scan Claude Code transcript for `#NN` observation-id
2
+ // citations in assistant text, then bulk-increment access_count for matched rows.
3
+ //
4
+ // Closes the loop on the CLAUDE.md "cite #NN" contract — before P4, citations
5
+ // were a one-way obligation with no measurable feedback. Now each honored
6
+ // citation bumps access_count, making contract compliance observable via
7
+ // mem_stats and preventing cited lessons from decaying into dead memory.
8
+ //
9
+ // FTS5 caveat (project_non_obvious.md): observations_au trigger fires on any
10
+ // column UPDATE including access_count. Per-row UPDATEs wrapped in try-catch
11
+ // to prevent SQLITE_CORRUPT_VTAB cascades from stopping the whole scan.
12
+
13
+ import { readFileSync, existsSync } from 'fs';
14
+ import { debugCatch } from '../utils.mjs';
15
+
16
+ // `#123` / `#45678` at a word boundary — matches the CLAUDE.md cite pattern.
17
+ // Bounded to 1-7 digits to skip URL fragments, markdown anchors, etc.
18
+ const CITATION_RE = /#(\d{1,7})\b/g;
19
+
20
+ /**
21
+ * Parse a Claude Code transcript .jsonl and extract unique observation IDs
22
+ * cited inside assistant text blocks.
23
+ *
24
+ * @param {string} transcriptPath Path to transcript file (.jsonl)
25
+ * @returns {Set<number>} unique IDs referenced as `#NN` in assistant text
26
+ */
27
+ export function extractCitationsFromTranscript(transcriptPath) {
28
+ const ids = new Set();
29
+ if (!transcriptPath || !existsSync(transcriptPath)) return ids;
30
+ let raw;
31
+ try { raw = readFileSync(transcriptPath, 'utf8'); } catch { return ids; }
32
+ for (const line of raw.split('\n')) {
33
+ if (!line.trim()) continue;
34
+ let entry;
35
+ try { entry = JSON.parse(line); } catch { continue; }
36
+ // Claude Code transcript: one JSON per line with type='assistant' | 'user' | ...
37
+ if (entry.type !== 'assistant' || !entry.message) continue;
38
+ const content = entry.message.content;
39
+ if (!Array.isArray(content)) continue;
40
+ for (const block of content) {
41
+ if (block.type !== 'text' || typeof block.text !== 'string') continue;
42
+ CITATION_RE.lastIndex = 0;
43
+ let m;
44
+ while ((m = CITATION_RE.exec(block.text))) {
45
+ const id = Number(m[1]);
46
+ if (Number.isInteger(id) && id > 0 && id < 1e7) ids.add(id);
47
+ }
48
+ }
49
+ }
50
+ return ids;
51
+ }
52
+
53
+ /**
54
+ * Increment `access_count` (and `last_accessed_at`) for each cited observation
55
+ * that belongs to `project`. Returns the count of successful increments.
56
+ *
57
+ * Per-row UPDATE in try-catch so a single FTS-corrupted row can't abort the
58
+ * scan. Cross-project IDs are silently ignored by the WHERE clause.
59
+ *
60
+ * @param {import('better-sqlite3').Database} db
61
+ * @param {Iterable<number>} ids
62
+ * @param {string} project
63
+ * @returns {number} count of rows incremented
64
+ */
65
+ export function bumpCitationAccess(db, ids, project) {
66
+ if (!db || !ids || !project) return 0;
67
+ const idList = Array.isArray(ids) ? ids : [...ids];
68
+ if (idList.length === 0) return 0;
69
+ const stmt = db.prepare(`
70
+ UPDATE observations SET access_count = access_count + 1, last_accessed_at = ?
71
+ WHERE id = ? AND project = ?
72
+ `);
73
+ const now = Date.now();
74
+ let n = 0;
75
+ for (const id of idList) {
76
+ try {
77
+ const result = stmt.run(now, id, project);
78
+ if (result.changes > 0) n++;
79
+ } catch (e) { debugCatch(e, `bumpCitationAccess-id-${id}`); }
80
+ }
81
+ return n;
82
+ }
@@ -58,3 +58,82 @@ export function buildNotLowSignalSql(alias = '') {
58
58
  const clauses = LOW_SIGNAL_PATTERNS.map(({ like }) => `${p}title NOT LIKE '${like}'`);
59
59
  return '(\n ' + clauses.join('\n AND ') + '\n )';
60
60
  }
61
+
62
+ // Cached singleton — isNoiseObservation is called once per observation insert.
63
+ const _LOW_SIG_RE = buildLowSignalRegex();
64
+
65
+ /**
66
+ * Detect narrative that is raw tool-output passthrough, not human/LLM prose (P2).
67
+ *
68
+ * `buildImmediateObservation` constructs narrative as
69
+ * `episode.entries.map(e => e.desc).join('; ')` where each desc is
70
+ * "cmd → stdout/stderr" from `scripts/post-tool-use.sh`. Such narratives
71
+ * have characteristic fingerprints (arrows, stack traces, diffs, test
72
+ * failure banners, absent sentence prose) that Haiku/user-written narratives
73
+ * don't. This check treats passthrough narratives as zero-signal for the
74
+ * purposes of isNoiseObservation.
75
+ *
76
+ * @param {string} narrative
77
+ * @returns {boolean} true = raw tool output, not substantive narrative
78
+ */
79
+ function _isLikelyToolOutputPassthrough(narrative) {
80
+ if (!narrative || narrative.length < 80) return false;
81
+ // post-tool-use.sh formats entries as "cmd → output"; presence of " → " in
82
+ // a long narrative is near-diagnostic of raw entry-desc passthrough.
83
+ if (/ → /.test(narrative)) return true;
84
+ // Stack-trace fingerprints that never appear in curated narratives.
85
+ if (/\n\s+at .+:\d+:\d+/.test(narrative)) return true;
86
+ if (/node:internal\//.test(narrative)) return true;
87
+ // Raw diff output.
88
+ if (/(^|\n)diff --git |(^|\n)@@ -\d/.test(narrative)) return true;
89
+ // Test-runner failure banners.
90
+ if (/(^|\n)\s*FAIL\s+|AssertionError|TypeError: |SyntaxError: /.test(narrative)) return true;
91
+ // Absent sentence prose + multi-"; " is the buildImmediateObservation join signature.
92
+ const hasSentenceBreaks = /\. [A-Z]/.test(narrative);
93
+ const semiJoins = (narrative.match(/; /g) || []).length;
94
+ if (!hasSentenceBreaks && semiJoins >= 2) return true;
95
+ return false;
96
+ }
97
+
98
+ /**
99
+ * Write-side noise filter (P0/P2). Returns true when an observation has a
100
+ * LOW_SIGNAL title AND no recoverable downstream signal — caller should skip
101
+ * insertion.
102
+ *
103
+ * Contract: a low-signal title is kept if ANY of these carry signal:
104
+ * - lesson_learned set and not 'none'
105
+ * - importance >= 2
106
+ * - facts has >=1 non-empty string
107
+ * - narrative >= 40 chars AND not raw stderr / tool-output passthrough (P2)
108
+ *
109
+ * Opt-out: env `CLAUDE_MEM_KEEP_LOW_SIGNAL=1` disables filter (preserves
110
+ * pre-v2.36 behavior — every observation is inserted regardless of signal).
111
+ *
112
+ * @param {object} obs Observation shape: { title, facts, narrative, lessonLearned|lesson_learned, importance }
113
+ * @param {object} [env=process.env] Environment (injected for testability)
114
+ * @returns {boolean} true = noise, caller should drop
115
+ */
116
+ export function isNoiseObservation(obs, env = process.env) {
117
+ if (env && env.CLAUDE_MEM_KEEP_LOW_SIGNAL === '1') return false;
118
+ const title = (obs && obs.title) || '';
119
+ if (!_LOW_SIG_RE.test(title)) return false;
120
+
121
+ const lesson = obs.lessonLearned ?? obs.lesson_learned;
122
+ if (lesson && String(lesson).trim() && String(lesson).trim().toLowerCase() !== 'none') return false;
123
+
124
+ if ((obs.importance ?? 1) >= 2) return false;
125
+
126
+ if (Array.isArray(obs.facts) &&
127
+ obs.facts.filter(f => typeof f === 'string' && f.trim().length > 0).length >= 1) {
128
+ return false;
129
+ }
130
+
131
+ const narrative = (obs.narrative || '').trim();
132
+ if (narrative.length >= 40 &&
133
+ !/^Error[: ]/i.test(narrative) &&
134
+ !_isLikelyToolOutputPassthrough(narrative)) {
135
+ return false;
136
+ }
137
+
138
+ return true;
139
+ }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "claude-mem-lite",
3
- "version": "2.35.0",
3
+ "version": "2.37.0",
4
4
  "description": "Lightweight persistent memory system for Claude Code",
5
5
  "type": "module",
6
6
  "engines": {
@@ -50,6 +50,7 @@
50
50
  "lib/doctor-drift.mjs",
51
51
  "lib/stats-quality.mjs",
52
52
  "lib/low-signal-patterns.mjs",
53
+ "lib/citation-tracker.mjs",
53
54
  "registry.mjs",
54
55
  "registry-retriever.mjs",
55
56
  "registry-indexer.mjs",
package/schema.mjs CHANGED
@@ -13,7 +13,7 @@ export const DB_PATH = join(DB_DIR, 'claude-mem-lite.db');
13
13
  export const REGISTRY_DB_PATH = join(DB_DIR, 'resource-registry.db');
14
14
 
15
15
  // Increment when schema changes (tables, columns, indexes, FTS, migrations)
16
- export const CURRENT_SCHEMA_VERSION = 25;
16
+ export const CURRENT_SCHEMA_VERSION = 26;
17
17
 
18
18
  const CORE_SCHEMA = `
19
19
  CREATE TABLE IF NOT EXISTS sdk_sessions (
@@ -112,6 +112,13 @@ const MIGRATIONS = [
112
112
  'ALTER TABLE observations ADD COLUMN superseded_by INTEGER DEFAULT NULL',
113
113
  'ALTER TABLE observations ADD COLUMN last_accessed_at INTEGER DEFAULT NULL',
114
114
  'ALTER TABLE observations ADD COLUMN optimized_at INTEGER DEFAULT NULL',
115
+ // v26 (P0 injection-noise): per-obs injection tracking for noise-ratio
116
+ // penalty. injection_count bumps only on UserPromptSubmit / hook-memory
117
+ // auto-injection (not on explicit recall/get/timeline — those keep bumping
118
+ // access_count). Pair with access_count to compute noise ratio: high
119
+ // injection_count + low access_count = low-signal, deprioritize.
120
+ 'ALTER TABLE observations ADD COLUMN injection_count INTEGER NOT NULL DEFAULT 0',
121
+ 'ALTER TABLE observations ADD COLUMN last_injected_at INTEGER DEFAULT NULL',
115
122
  ];
116
123
 
117
124
  /**
package/scoring-sql.mjs CHANGED
@@ -61,6 +61,44 @@ export const TYPE_QUALITY_CASE = `(
61
61
  END
62
62
  )`;
63
63
 
64
+ /**
65
+ * Noise-ratio penalty: deprioritizes observations that get auto-injected often
66
+ * but rarely "used" (cited via Stop-hook citation tracker, or explicitly
67
+ * recalled/opened via pre-tool-recall / cmdRecall / cmdGet / cmdTimeline).
68
+ *
69
+ * Signal sources:
70
+ * - injection_count: bumped ONLY on UserPromptSubmit / hook-memory auto-inject
71
+ * - access_count: bumped on citation (c039352 P4), explicit recall, get, timeline
72
+ *
73
+ * Empirical thresholds (see docs/p0-injection-noise-baseline.txt, 53 transcripts):
74
+ * • High-noise legitimate use (#5597 29/10=2.9x): kept at 1.0× (below tier-1)
75
+ * • Moderate noise (#4352 44/9=4.89x): drops to 0.5× (tier-1 hit)
76
+ * • Pure noise (#4046 14/0=inf): drops to 0.5× (tier-1; count≥10 gate protects
77
+ * cold-start obs with legitimately no cites yet)
78
+ * • Entrenched noise (≥20 inject, ≥5× ratio): drops to 0.2× (tier-2)
79
+ *
80
+ * Applied as: BM25 × time_decay × TYPE_QUALITY × (0.5 + 0.5·importance) × NOISE_PENALTY
81
+ * Note: multiplicative so ORDER BY relevance ASC (negative scores) still works —
82
+ * penalty shrinks magnitude, making the row less preferable.
83
+ *
84
+ * @param {string} [alias='o'] Table alias for the observations row.
85
+ * @returns {string} SQL CASE expression (already parenthesized).
86
+ */
87
+ export function noisePenaltyClause(alias = 'o') {
88
+ const a = alias ? `${alias}.` : '';
89
+ return `(
90
+ CASE
91
+ WHEN COALESCE(${a}injection_count, 0) >= 20
92
+ AND COALESCE(${a}injection_count, 0) > COALESCE(${a}access_count, 0) * 5
93
+ THEN 0.2
94
+ WHEN COALESCE(${a}injection_count, 0) >= 10
95
+ AND COALESCE(${a}injection_count, 0) > COALESCE(${a}access_count, 0) * 3
96
+ THEN 0.5
97
+ ELSE 1.0
98
+ END
99
+ )`;
100
+ }
101
+
64
102
  /**
65
103
  * SQL WHERE clause fragment excluding LOW_SIGNAL degraded titles — the fallback
66
104
  * titles hook-llm.mjs writes when Haiku summarization is unavailable or skipped
@@ -4,7 +4,7 @@
4
4
  // Lightweight: only imports schema.mjs and utils.mjs, no MCP SDK
5
5
 
6
6
  import { ensureDb, DB_DIR, REGISTRY_DB_PATH } from '../schema.mjs';
7
- import { sanitizeFtsQuery, relaxFtsQueryToOr, truncate, typeIcon, inferProject, OBS_BM25, TYPE_DECAY_CASE, TYPE_QUALITY_CASE, notLowSignalTitleClause } from '../utils.mjs';
7
+ import { sanitizeFtsQuery, relaxFtsQueryToOr, truncate, typeIcon, inferProject, OBS_BM25, TYPE_DECAY_CASE, TYPE_QUALITY_CASE, notLowSignalTitleClause, noisePenaltyClause } from '../utils.mjs';
8
8
  import { writeFileSync, readFileSync, existsSync, renameSync } from 'fs';
9
9
  import { join } from 'path';
10
10
  import Database from 'better-sqlite3';
@@ -87,12 +87,16 @@ function searchByFts(db, queryText, project, limit, typeFilter) {
87
87
  const now = Date.now();
88
88
  // R1: notLowSignalTitleClause() excludes hook-llm degraded titles
89
89
  // ("Modified X", "Worked on X", "Reviewed N files:", raw error logs).
90
+ // v26 P0: noise penalty shrinks relevance magnitude for obs with high
91
+ // inject:access ratio (auto-injected often, never cited/opened). See
92
+ // docs/p0-injection-noise-baseline.txt.
90
93
  const sql = `
91
94
  SELECT o.id, o.type, o.title, o.lesson_learned,
92
95
  ${OBS_BM25}
93
96
  * (1.0 + EXP(-0.693 * (? - o.created_at_epoch) / ${TYPE_DECAY_CASE}))
94
97
  * ${TYPE_QUALITY_CASE}
95
- * (0.5 + 0.5 * COALESCE(o.importance, 1)) as relevance
98
+ * (0.5 + 0.5 * COALESCE(o.importance, 1))
99
+ * ${noisePenaltyClause('o')} as relevance
96
100
  FROM observations_fts
97
101
  JOIN observations o ON o.id = observations_fts.rowid
98
102
  WHERE observations_fts MATCH ?
@@ -460,6 +464,22 @@ async function main() {
460
464
  count: prevCount + 1,
461
465
  }));
462
466
  } catch {}
467
+ // v26 P0: bump injection_count for obs-based emits only (prompt-corpus
468
+ // rows have "P<id>" string IDs; skip those — they live in user_prompts).
469
+ // Per-row try/catch: observations_au trigger reinserts FTS on any UPDATE
470
+ // (project_non_obvious.md); an FTS corruption on one row must not abort
471
+ // counter bumps for other rows.
472
+ if (rows.length > 0) {
473
+ try {
474
+ const now = Date.now();
475
+ const bumpStmt = db.prepare(
476
+ 'UPDATE observations SET injection_count = COALESCE(injection_count, 0) + 1, last_injected_at = ? WHERE id = ?'
477
+ );
478
+ for (const r of rows) {
479
+ try { bumpStmt.run(now, r.id); } catch {}
480
+ }
481
+ } catch {}
482
+ }
463
483
  }
464
484
 
465
485
  // ─── L1: Registry skill pointer (T4 v2.31) ──────────────────────────
package/source-files.mjs CHANGED
@@ -37,6 +37,7 @@ export const SOURCE_FILES = [
37
37
  'lib/doctor-drift.mjs',
38
38
  'lib/stats-quality.mjs',
39
39
  'lib/low-signal-patterns.mjs',
40
+ 'lib/citation-tracker.mjs',
40
41
  // v2.32 invited-memory: memdir primitives + adopt/unadopt CLI
41
42
  'memdir.mjs',
42
43
  'adopt-content.mjs',
package/utils.mjs CHANGED
@@ -9,7 +9,7 @@ import { buildLowSignalRegex } from './lib/low-signal-patterns.mjs';
9
9
  // ─── Re-exports from extracted modules ──────────────────────────────────────
10
10
  // Backward compatibility: all consumers import from utils.mjs
11
11
 
12
- export { DECAY_HALF_LIFE_BY_TYPE, DEFAULT_DECAY_HALF_LIFE_MS, OBS_BM25, SESS_BM25, TYPE_DECAY_CASE, TYPE_QUALITY_CASE, OBS_FTS_COLUMNS, notLowSignalTitleClause } from './scoring-sql.mjs';
12
+ export { DECAY_HALF_LIFE_BY_TYPE, DEFAULT_DECAY_HALF_LIFE_MS, OBS_BM25, SESS_BM25, TYPE_DECAY_CASE, TYPE_QUALITY_CASE, OBS_FTS_COLUMNS, notLowSignalTitleClause, noisePenaltyClause } from './scoring-sql.mjs';
13
13
  export { cjkBigrams, extractCjkSynonymTokens, extractCjkKeywords, extractCjkLikePatterns, SYNONYM_MAP, expandToken, sanitizeFtsQuery, relaxFtsQueryToOr, FTS_STOP_WORDS, CJK_COMPOUNDS } from './nlp.mjs';
14
14
  export { resolveProject, _resetProjectCache } from './project-utils.mjs';
15
15
  export { scrubSecrets, SECRET_PATTERNS } from './secret-scrub.mjs';