claude-mem-lite 2.46.0 → 2.47.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -10,7 +10,7 @@
10
10
  "plugins": [
11
11
  {
12
12
  "name": "claude-mem-lite",
13
- "version": "2.46.0",
13
+ "version": "2.47.0",
14
14
  "source": "./",
15
15
  "description": "Lightweight persistent memory system for Claude Code — FTS5 search, episode batching, error-triggered recall"
16
16
  }
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "claude-mem-lite",
3
- "version": "2.46.0",
3
+ "version": "2.47.0",
4
4
  "description": "Lightweight persistent memory system for Claude Code — FTS5 search, episode batching, error-triggered recall",
5
5
  "author": {
6
6
  "name": "sdsrss"
package/hook-llm.mjs CHANGED
@@ -16,7 +16,7 @@ import {
16
16
  sessionFile, getSessionId, openDb, callLLM, sleep,
17
17
  } from './hook-shared.mjs';
18
18
  import { EVENT_TYPES, saveEvent } from './lib/activity.mjs';
19
- import { isNoiseObservation } from './lib/low-signal-patterns.mjs';
19
+ import { isNoiseObservation, capNoiseImportance } from './lib/low-signal-patterns.mjs';
20
20
 
21
21
  // T9: memdir-incompatible types live in the `events` table, not `observations`.
22
22
  // Set lookup is O(1) — authoritative source is lib/activity.mjs::EVENT_TYPES.
@@ -78,6 +78,16 @@ export function saveObservation(obs, projectOverride, sessionIdOverride, externa
78
78
  return null;
79
79
  }
80
80
 
81
+ // v2.47 P0-3: importance cap for LOW_SIGNAL titles that kept the drop gate
82
+ // open via importance>=2 but carry no lesson/facts signal. 341 rows in live
83
+ // DB had imp=3 under these conditions (99.4% noise). Cap to 1 so they
84
+ // enter the 7-day accelerated auto-compress window in hook.mjs.
85
+ const capped = capNoiseImportance(obs);
86
+ if (capped !== (obs.importance ?? 1)) {
87
+ debugLog('saveObservation', `capped imp ${obs.importance}→${capped}: ${truncate(obs.title || '', 60)}`);
88
+ obs.importance = capped;
89
+ }
90
+
81
91
  // Three-tier dedup — returns null (not throw) for dedup hits
82
92
  // Tier 1 (fast): 5-min Jaccard on titles
83
93
  const fiveMinAgo = now.getTime() - DEDUP_WINDOW_MS;
package/hook.mjs CHANGED
@@ -648,6 +648,29 @@ async function handleSessionStart() {
648
648
  if (compressed.changes > 0) {
649
649
  debugLog('DEBUG', 'session-start', `auto-compressed ${compressed.changes} old observations`);
650
650
  }
651
+
652
+ // v2.47 P0-3: accelerated compress for LOW_SIGNAL + no-signal noise.
653
+ // 7-day window instead of 30. The write-side capNoiseImportance forces
654
+ // imp=1 on these already; this just shrinks the GC latency so the
655
+ // projected 32.5% corpus reduction materializes within a week on live
656
+ // DBs instead of bleeding into the 30-day tier.
657
+ const noiseCompressAge = Date.now() - 7 * 86400000;
658
+ const noiseCompressed = db.prepare(`
659
+ UPDATE observations SET compressed_into = ${COMPRESSED_AUTO}
660
+ WHERE COALESCE(compressed_into, 0) = 0
661
+ AND importance = 1
662
+ AND (lesson_learned IS NULL OR lesson_learned = '' OR lesson_learned = 'none')
663
+ AND (facts IS NULL OR facts = '' OR facts = '[]')
664
+ AND (
665
+ title LIKE 'Modified %' OR title LIKE 'Worked on %'
666
+ OR title LIKE 'Reviewed %' OR title LIKE 'Error%'
667
+ )
668
+ AND created_at_epoch < ?
669
+ AND project = ?
670
+ `).run(noiseCompressAge, project);
671
+ if (noiseCompressed.changes > 0) {
672
+ debugLog('DEBUG', 'session-start', `auto-compressed ${noiseCompressed.changes} LOW_SIGNAL noise (7d window)`);
673
+ }
651
674
  })();
652
675
 
653
676
  // Auto-maintain: cleanup + decay + boost + purge, gated to once per 24h
@@ -113,6 +113,40 @@ function _isLikelyToolOutputPassthrough(narrative) {
113
113
  * @param {object} [env=process.env] Environment (injected for testability)
114
114
  * @returns {boolean} true = noise, caller should drop
115
115
  */
116
+ /**
117
+ * v2.47 P0-3: Importance cap for LOW_SIGNAL titles that slipped through with
118
+ * inflated importance. Complements isNoiseObservation — that one drops rows
119
+ * entirely when narrative is also thin; this one keeps the row (useful for
120
+ * session history) but demotes the importance so injection ranking and
121
+ * auto-compress treat it as the noise it is.
122
+ *
123
+ * Production baseline (2026-04-24, projects--mem DB, 3789 obs):
124
+ * LOW_SIGNAL title + importance=3 → 341 rows; only 1 had lesson, 1 had facts
125
+ * LOW_SIGNAL title + importance=2 → 80 rows; only 5 had lesson, 6 had facts
126
+ * 99%+ of those were Haiku-inflated noise. Cap forces imp=1 and the 7-day
127
+ * accelerated auto-compress in hook.mjs GCs them.
128
+ *
129
+ * Preserves importance when ANY real signal exists:
130
+ * - lesson_learned (or camelCase lessonLearned) set and not 'none'
131
+ * - facts array has >=1 non-empty string
132
+ * Non-LOW_SIGNAL titles are never capped (substantive prose is trusted).
133
+ *
134
+ * @param {object} obs { title, facts, importance, lesson_learned|lessonLearned }
135
+ * @returns {number} Capped importance (1 if LOW_SIGNAL+no-signal, else original)
136
+ */
137
+ export function capNoiseImportance(obs) {
138
+ const original = obs?.importance ?? 1;
139
+ const title = (obs && obs.title) || '';
140
+ if (!_LOW_SIG_RE.test(title)) return original;
141
+ const lesson = obs.lessonLearned ?? obs.lesson_learned;
142
+ if (lesson && String(lesson).trim() && String(lesson).trim().toLowerCase() !== 'none') return original;
143
+ if (Array.isArray(obs.facts) &&
144
+ obs.facts.filter(f => typeof f === 'string' && f.trim().length > 0).length >= 1) {
145
+ return original;
146
+ }
147
+ return original > 1 ? 1 : original;
148
+ }
149
+
116
150
  export function isNoiseObservation(obs, env = process.env) {
117
151
  if (env && env.CLAUDE_MEM_KEEP_LOW_SIGNAL === '1') return false;
118
152
  const title = (obs && obs.title) || '';
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "claude-mem-lite",
3
- "version": "2.46.0",
3
+ "version": "2.47.0",
4
4
  "description": "Lightweight persistent memory system for Claude Code",
5
5
  "type": "module",
6
6
  "engines": {
package/schema.mjs CHANGED
@@ -21,7 +21,12 @@ export const REGISTRY_DB_PATH = join(DB_DIR, 'resource-registry.db');
21
21
  // FTS delete+reinsert cycles and amplified SQLITE_CORRUPT_VTAB blast radius
22
22
  // (project_non_obvious.md). Migration drops the old triggers once and lets
23
23
  // ensureFTS recreate them with the scoped form.
24
- export const CURRENT_SCHEMA_VERSION = 27;
24
+ //
25
+ // v28 (v2.47): observation_vectors orphan + stale-vocab cleanup. Live DBs had
26
+ // 2839/6429 (44%) orphaned rows (historic deletes during FK-OFF migrations)
27
+ // and 3282/6429 (51%) stale-vocab rows (rebuildVocabulary never pruned old
28
+ // versions before v2.47). Idempotent one-shot DELETE on ensureDb.
29
+ export const CURRENT_SCHEMA_VERSION = 28;
25
30
 
26
31
  const CORE_SCHEMA = `
27
32
  CREATE TABLE IF NOT EXISTS sdk_sessions (
@@ -402,6 +407,17 @@ export function initSchema(db) {
402
407
 
403
408
  db.exec(`CREATE INDEX IF NOT EXISTS idx_obs_vectors_version ON observation_vectors(vocab_version)`);
404
409
 
410
+ // v28 (v2.47) P0-1: one-shot cleanup of orphaned observation_vectors.
411
+ // Live DBs accumulated 44% orphans even with ON DELETE CASCADE because
412
+ // early migrations ran with `foreign_keys=OFF` and deletes skipped cascade.
413
+ // Idempotent (NOT IN is empty on a clean DB), runs once per ensureDb().
414
+ try {
415
+ db.prepare(`
416
+ DELETE FROM observation_vectors
417
+ WHERE observation_id NOT IN (SELECT id FROM observations)
418
+ `).run();
419
+ } catch { /* non-critical — table-missing path handled by earlier CREATE */ }
420
+
405
421
  // Persisted vocabulary for stable TF-IDF vector indexing
406
422
  db.exec(`
407
423
  CREATE TABLE IF NOT EXISTS vocab_state (
package/scoring-sql.mjs CHANGED
@@ -70,12 +70,17 @@ export const TYPE_QUALITY_CASE = `(
70
70
  * - injection_count: bumped ONLY on UserPromptSubmit / hook-memory auto-inject
71
71
  * - access_count: bumped on citation (c039352 P4), explicit recall, get, timeline
72
72
  *
73
- * Empirical thresholds (see docs/p0-injection-noise-baseline.txt, 53 transcripts):
74
- * High-noise legitimate use (#5597 29/10=2.9x): kept at 1.0× (below tier-1)
75
- * • Moderate noise (#4352 44/9=4.89x): drops to 0.5× (tier-1 hit)
76
- * • Pure noise (#4046 14/0=inf): drops to 0.5× (tier-1; count≥10 gate protects
77
- * cold-start obs with legitimately no cites yet)
78
- * • Entrenched noise (≥20 inject, ≥5× ratio): drops to 0.2× (tier-2)
73
+ * Empirical thresholds (v2.47 recalibration — 2026-04-24 live projects--mem,
74
+ * 3789 obs, baseline 10/20 never fired because max injection_count=9):
75
+ * • Legitimate heavy use (#5588 9/10=0.9, #7549 7/13=0.54): ratio≤3 1.
76
+ * • Early noise candidate (#3518 6/1=6.0): inj≥4 AND ratio>3 ⇒ 0.5× (tier-1)
77
+ * Entrenched noise (inj≥8 AND ratio>5): 0.2× (tier-2)
78
+ *
79
+ * Old thresholds (v26→v2.46, inj≥10/≥20) were chosen as theoretical upper bounds
80
+ * before injection_count accumulated 2 months of data — live distribution shows
81
+ * 100% of rows stayed under 10 inject events. The recalibrated gates bite the
82
+ * moderate-noise tier (first real data band) while still sparing ratio-clean
83
+ * heavy-use rows (ratio gate is the primary precision signal).
79
84
  *
80
85
  * Applied as: BM25 × time_decay × TYPE_QUALITY × (0.5 + 0.5·importance) × NOISE_PENALTY
81
86
  * Note: multiplicative so ORDER BY relevance ASC (negative scores) still works —
@@ -88,10 +93,10 @@ export function noisePenaltyClause(alias = 'o') {
88
93
  const a = alias ? `${alias}.` : '';
89
94
  return `(
90
95
  CASE
91
- WHEN COALESCE(${a}injection_count, 0) >= 20
96
+ WHEN COALESCE(${a}injection_count, 0) >= 8
92
97
  AND COALESCE(${a}injection_count, 0) > COALESCE(${a}access_count, 0) * 5
93
98
  THEN 0.2
94
- WHEN COALESCE(${a}injection_count, 0) >= 10
99
+ WHEN COALESCE(${a}injection_count, 0) >= 4
95
100
  AND COALESCE(${a}injection_count, 0) > COALESCE(${a}access_count, 0) * 3
96
101
  THEN 0.5
97
102
  ELSE 1.0
package/tfidf.mjs CHANGED
@@ -252,6 +252,13 @@ export function rebuildVocabulary(db) {
252
252
  for (const [term, entry] of vocab.terms) {
253
253
  insertStmt.run(term, entry.index, entry.idf, vocab.version, now);
254
254
  }
255
+ // v2.47 P0-1: drop observation_vectors from earlier vocab versions.
256
+ // Without this, rebuildVocabulary compounded the stale set on every call
257
+ // (live DB measured 3282/6429 = 51% stale). vectorSearch filters by
258
+ // vocab_version at query time, so stale rows were dead storage.
259
+ try {
260
+ db.prepare('DELETE FROM observation_vectors WHERE vocab_version != ?').run(vocab.version);
261
+ } catch { /* table missing on legacy DBs — non-critical */ }
255
262
  })();
256
263
 
257
264
  _vocabCache = vocab;